TensorRT-LLM v0.18.2 release (#3611)

2026-01-13 22:18:36 +08:00 · 2025-04-16 14:42:50 +08:00 · 2025-04-16 14:42:50 +08:00 · 5aec7af45f
commit 5aec7af45f
parent 62f3c954b9
54 changed files with 146 additions and 77 deletions
--- a/README.md
+++ b/README.md
@ -9,7 +9,7 @@ TensorRT-LLM
 [![python](https://img.shields.io/badge/python-3.10-green)](https://www.python.org/downloads/release/python-31012/)
 [![cuda](https://img.shields.io/badge/cuda-12.8.1-green)](https://developer.nvidia.com/cuda-downloads)
 [![trt](https://img.shields.io/badge/TRT-10.9.0-green)](https://developer.nvidia.com/tensorrt)
-[![version](https://img.shields.io/badge/release-0.18.1-green)](./tensorrt_llm/version.py)
+[![version](https://img.shields.io/badge/release-0.18.2-green)](./tensorrt_llm/version.py)
 [![license](https://img.shields.io/badge/license-Apache%202-blue)](./LICENSE)

 [Architecture](./docs/source/architecture/overview.md)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Performance](./docs/source/performance/perf-overview.md)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Examples](./examples/)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Documentation](./docs/source/)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Roadmap](https://docs.google.com/presentation/d/1gycPmtdh7uUcH6laOvW65Dbp9F1McUkGDIcAyjicBZs/edit?usp=sharing)
--- a/cpp/tensorrt_llm/batch_manager/aarch64-linux-gnu/version.txt
+++ b/cpp/tensorrt_llm/batch_manager/aarch64-linux-gnu/version.txt
@ -1,2 +1,2 @@
 9f9942768fd5b0cf5ed19860ad539dc9 libtensorrt_llm_ucx_wrapper.so
-d2efc6043262c896e262e8d8b97055af0f1f8b47 commit
+edf502396e4443f284a5fae6044402478cf457c1 commit
--- a/cpp/tensorrt_llm/batch_manager/x86_64-linux-gnu/libtensorrt_llm_batch_manager_static.a
+++ b/cpp/tensorrt_llm/batch_manager/x86_64-linux-gnu/libtensorrt_llm_batch_manager_static.a
@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e015c5cab637202b76f6ccd2d59b1427dc739f10321996a003230ba32814c08b
+oid sha256:ee2a324ae76a843823d1d82686bb495d097367e1c3a41aa9596fd0d2ba3fadae
 size 8408224
--- a/cpp/tensorrt_llm/batch_manager/x86_64-linux-gnu/libtensorrt_llm_batch_manager_static.pre_cxx11.a
+++ b/cpp/tensorrt_llm/batch_manager/x86_64-linux-gnu/libtensorrt_llm_batch_manager_static.pre_cxx11.a
@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1205f8fe60cc657645441c14c304888ad7cd68bc4cd1fabd10931a719560a42a
+oid sha256:4fcd95de792c72a38d4c1e76a4e714a2c69ffc25f03172075efb432e40ec29cd
 size 8374456
--- a/cpp/tensorrt_llm/batch_manager/x86_64-linux-gnu/version.txt
+++ b/cpp/tensorrt_llm/batch_manager/x86_64-linux-gnu/version.txt
@ -1,2 +1,2 @@
 e383212a40dca932c7b77bf4544dab80 libtensorrt_llm_ucx_wrapper.so
-d2efc6043262c896e262e8d8b97055af0f1f8b47 commit
+edf502396e4443f284a5fae6044402478cf457c1 commit
--- a/cpp/tensorrt_llm/executor/aarch64-linux-gnu/libtensorrt_llm_executor_static.a
+++ b/cpp/tensorrt_llm/executor/aarch64-linux-gnu/libtensorrt_llm_executor_static.a
@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e2fbf80d02c115b9eeb2c18d24e3cb55f0c5404eba563591abeab7d223518df6
+oid sha256:6fd8ea50100bbbdc9d1d52d4b7e9a82f01583884eeb4d2703d537b6785c63ea7
 size 3102764
--- a/cpp/tensorrt_llm/executor/aarch64-linux-gnu/libtensorrt_llm_executor_static.pre_cxx11.a
+++ b/cpp/tensorrt_llm/executor/aarch64-linux-gnu/libtensorrt_llm_executor_static.pre_cxx11.a
@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:27f9acc83aa72979834cc8c216cf06e6ab4e9b10a7d1c9928bac3721fef037bd
+oid sha256:cd30000142d1256991fa27644d86dcd12a4c017eab9345a88ac705914aba8d11
 size 3145744
--- a/cpp/tensorrt_llm/executor/aarch64-linux-gnu/version.txt
+++ b/cpp/tensorrt_llm/executor/aarch64-linux-gnu/version.txt
@ -1,3 +1,3 @@
-61ab1a6d4c62ee2a648f6daa5083c4de libtensorrt_llm_executor_static.a
-2f2bc67944c45ce0965704da43c9b1c4 libtensorrt_llm_executor_static.pre_cxx11.a
-d2efc6043262c896e262e8d8b97055af0f1f8b47 commit
+1146671822817c690387dc77d775b8c7 libtensorrt_llm_executor_static.a
+8f7cb0047a0c2690497a97911a60ed6d libtensorrt_llm_executor_static.pre_cxx11.a
+edf502396e4443f284a5fae6044402478cf457c1 commit
--- a/cpp/tensorrt_llm/executor/x86_64-linux-gnu/libtensorrt_llm_executor_static.a
+++ b/cpp/tensorrt_llm/executor/x86_64-linux-gnu/libtensorrt_llm_executor_static.a
@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ae603dd0c585a7ee601fb6816ac2cdde674d5c49b96c7dce88de2bc67ea727bc
+oid sha256:22951d2bb0e5da2a1eb20ae0eb74690ddd57e7f1dd9545762eed1e0f468dd4a5
 size 3457520
--- a/cpp/tensorrt_llm/executor/x86_64-linux-gnu/libtensorrt_llm_executor_static.pre_cxx11.a
+++ b/cpp/tensorrt_llm/executor/x86_64-linux-gnu/libtensorrt_llm_executor_static.pre_cxx11.a
@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7adc486890442df336e145b0ccf982bc733f1a9cc8116f7ce56f1769cf7b1154
+oid sha256:0c13a28fc903da20aad74aeb1c3d04a3b1bf91421fdbe85a16b3552b3e7e431b
 size 3448406
--- a/cpp/tensorrt_llm/executor/x86_64-linux-gnu/version.txt
+++ b/cpp/tensorrt_llm/executor/x86_64-linux-gnu/version.txt
@ -1,3 +1,3 @@
-e5da8cc2936606dfb49f4417d6961060 libtensorrt_llm_executor_static.a
-ad5dfb89c2d719d99d67346828e92e25 libtensorrt_llm_executor_static.pre_cxx11.a
-d2efc6043262c896e262e8d8b97055af0f1f8b47 commit
+34a5173ddebafd3f1621af2717a92f54 libtensorrt_llm_executor_static.a
+34eacc123dc995815fbd1e68ec98f78b libtensorrt_llm_executor_static.pre_cxx11.a
+edf502396e4443f284a5fae6044402478cf457c1 commit
--- a/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/aarch64-linux-gnu/version.txt
+++ b/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/aarch64-linux-gnu/version.txt
@ -1,2 +1,2 @@
 f3143205203b038b9dca6dd32cf02f59 libtensorrt_llm_nvrtc_wrapper.so
-d2efc6043262c896e262e8d8b97055af0f1f8b47 commit
+edf502396e4443f284a5fae6044402478cf457c1 commit
--- a/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/x86_64-linux-gnu/version.txt
+++ b/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/nvrtcWrapper/x86_64-linux-gnu/version.txt
@ -1,2 +1,2 @@
 770ca93818f3f04837a67353e3f71fbc libtensorrt_llm_nvrtc_wrapper.so
-d2efc6043262c896e262e8d8b97055af0f1f8b47 commit
+edf502396e4443f284a5fae6044402478cf457c1 commit
--- a/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/aarch64-linux-gnu/version.txt
+++ b/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/aarch64-linux-gnu/version.txt
@ -1,3 +1,3 @@
 6bf0ba4e9b8b1152a21316243d30bec6 libtensorrt_llm_internal_cutlass_kernels_static.a
 96f8a359c84a78ba415f4d98ef1c4e1d libtensorrt_llm_internal_cutlass_kernels_static.pre_cxx11.a
-d2efc6043262c896e262e8d8b97055af0f1f8b47 commit
+edf502396e4443f284a5fae6044402478cf457c1 commit
--- a/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/x86_64-linux-gnu/libtensorrt_llm_internal_cutlass_kernels_static.a
+++ b/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/x86_64-linux-gnu/libtensorrt_llm_internal_cutlass_kernels_static.a
@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0b0f621e74dd506e49acd027dc09e9d2a3a6e0117ca0af68254841c02fb9c1dd
-size 68126454
+oid sha256:447838fe5c798098410a2cfed027aa38df847da2f725b9b8ccec57e73a1e194a
+size 68114502
--- a/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/x86_64-linux-gnu/libtensorrt_llm_internal_cutlass_kernels_static.pre_cxx11.a
+++ b/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/x86_64-linux-gnu/libtensorrt_llm_internal_cutlass_kernels_static.pre_cxx11.a
@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8448e105a5002148083bacc6c5066e017719ccd532c021f4c821df74fa0b763f
-size 68295728
+oid sha256:42b88e56cee5b9b81a66836add80ba79819afa24cbfd72140f4d62a244e3f960
+size 68295696
--- a/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/x86_64-linux-gnu/version.txt
+++ b/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/x86_64-linux-gnu/version.txt
@ -1,3 +1,3 @@
-0b3322f5047dd4ee549211c2d15483c4 libtensorrt_llm_internal_cutlass_kernels_static.a
-502d4901fad6e648b8858051017c4cf2 libtensorrt_llm_internal_cutlass_kernels_static.pre_cxx11.a
-d2efc6043262c896e262e8d8b97055af0f1f8b47 commit
+4de75ffa1ff225422ba27f367175448f libtensorrt_llm_internal_cutlass_kernels_static.a
+e91d6c762f26c0b158eba8f376914e6e libtensorrt_llm_internal_cutlass_kernels_static.pre_cxx11.a
+edf502396e4443f284a5fae6044402478cf457c1 commit
--- a/docs/source/release-notes.md
+++ b/docs/source/release-notes.md
@ -5,6 +5,12 @@
 All published functionality in the Release Notes has been fully tested and verified with known limitations documented. To share feedback about this release, access our [NVIDIA Developer Forum](https://forums.developer.nvidia.com/).


+## TensorRT-LLM Release 0.18.2
+
+### Key Features and Enhancements
+  - This update addresses known security issues. For the latest NVIDIA Vulnerability Disclosure Information visit https://www.nvidia.com/en-us/security/.
+
+
 ## TensorRT-LLM Release 0.18.1

 ### Key Features and Enhancements
--- a/examples/baichuan/requirements.txt
+++ b/examples/baichuan/requirements.txt
@ -1,4 +1,4 @@
-tensorrt_llm==0.18.1
+tensorrt_llm==0.18.2
 datasets~=2.15.0
 evaluate~=0.4.1
 rouge_score~=0.1.2
--- a/examples/bloom/requirements.txt
+++ b/examples/bloom/requirements.txt
@ -1,4 +1,4 @@
-tensorrt_llm==0.18.1
+tensorrt_llm==0.18.2
 datasets~=2.14.5
 evaluate~=0.4.1
 rouge_score~=0.1.2
--- a/examples/chatglm/requirements.txt
+++ b/examples/chatglm/requirements.txt
@ -1,4 +1,4 @@
-tensorrt_llm==0.18.1
+tensorrt_llm==0.18.2
 datasets~=2.14.5
 evaluate~=0.4.1
 protobuf
--- a/examples/commandr/requirements.txt
+++ b/examples/commandr/requirements.txt
@ -1,4 +1,4 @@
-tensorrt_llm==0.18.1
+tensorrt_llm==0.18.2
 datasets==2.14.6
 evaluate~=0.4.1
 rouge_score~=0.1.2
--- a/examples/dbrx/requirements.txt
+++ b/examples/dbrx/requirements.txt
@ -1,4 +1,4 @@
-tensorrt_llm==0.18.1
+tensorrt_llm==0.18.2
 datasets~=2.14.5
 evaluate~=0.4.1
 rouge_score~=0.1.2
--- a/examples/deepseek_v1/requirements.txt
+++ b/examples/deepseek_v1/requirements.txt
@ -1,4 +1,4 @@
-tensorrt_llm==0.18.1
+tensorrt_llm==0.18.2
 datasets~=2.14.6
 evaluate~=0.4.1
 rouge_score~=0.1.2
--- a/examples/draft_target_model/requirements.txt
+++ b/examples/draft_target_model/requirements.txt
@ -1,4 +1,4 @@
-tensorrt_llm==0.18.1
+tensorrt_llm==0.18.2
 datasets~=2.14.5
 rouge_score~=0.1.2
 sentencepiece>=0.1.99
--- a/examples/eagle/requirements.txt
+++ b/examples/eagle/requirements.txt
@ -1,4 +1,4 @@
-tensorrt_llm==0.18.1
+tensorrt_llm==0.18.2
 datasets~=2.14.5
 rouge_score~=0.1.2
 SentencePiece~=0.1.99
--- a/examples/falcon/requirements.txt
+++ b/examples/falcon/requirements.txt
@ -1,4 +1,4 @@
-tensorrt_llm==0.18.1
+tensorrt_llm==0.18.2
 transformers>=4.31.0
 datasets~=2.14.5
 evaluate~=0.4.1
--- a/examples/gemma/requirements.txt
+++ b/examples/gemma/requirements.txt
@ -2,7 +2,7 @@
 # WAR the new posting of "nvidia-cudnn-cu12~=9.0".
 # "jax[cuda12_pip]~=0.4.19" specifies "nvidia-cudnn-cu12>=8.9" but actually requires "nvidia-cudnn-cu12~=8.9".
 nvidia-cudnn-cu12~=8.9; platform_machine == "x86_64"
-tensorrt_llm==0.18.1
+tensorrt_llm==0.18.2
 flax~=0.8.0
 # jax[cuda12_pip]~=0.4.19; platform_system != "Windows"
 jax~=0.4.19; platform_system == "Windows"
--- a/examples/gpt/requirements.txt
+++ b/examples/gpt/requirements.txt
@ -1,4 +1,4 @@
-tensorrt_llm==0.18.1
+tensorrt_llm==0.18.2
 datasets~=2.14.5
 evaluate~=0.4.1
 rouge_score~=0.1.2
--- a/examples/gptj/requirements.txt
+++ b/examples/gptj/requirements.txt
@ -1,4 +1,4 @@
-tensorrt_llm==0.18.1
+tensorrt_llm==0.18.2
 datasets~=2.14.5
 evaluate~=0.4.1
 rouge_score~=0.1.2
--- a/examples/gptneox/requirements.txt
+++ b/examples/gptneox/requirements.txt
@ -1,4 +1,4 @@
-tensorrt_llm==0.18.1
+tensorrt_llm==0.18.2
 datasets~=2.14.5
 rouge_score~=0.1.2
 evaluate~=0.4.1
--- a/examples/grok/requirements.txt
+++ b/examples/grok/requirements.txt
@ -1,5 +1,5 @@
 -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
-tensorrt_llm==0.18.1
+tensorrt_llm==0.18.2
 datasets==2.14.6
 evaluate~=0.4.1
 rouge_score~=0.1.2
--- a/examples/internlm/requirements.txt
+++ b/examples/internlm/requirements.txt
@ -1,4 +1,4 @@
-tensorrt_llm==0.18.1
+tensorrt_llm==0.18.2
 datasets==2.14.5
 rouge_score~=0.1.2
 sentencepiece>=0.1.99
--- a/examples/jais/requirements.txt
+++ b/examples/jais/requirements.txt
@ -1,4 +1,4 @@
-tensorrt_llm==0.18.1
+tensorrt_llm==0.18.2
 datasets~=2.14.5
 evaluate~=0.4.1
 rouge_score~=0.1.2
--- a/examples/llama/requirements.txt
+++ b/examples/llama/requirements.txt
@ -1,4 +1,4 @@
-tensorrt_llm==0.18.1
+tensorrt_llm==0.18.2
 transformers>=4.43.0
 datasets==2.14.6
 evaluate~=0.4.1
--- a/examples/lookahead/requirements.txt
+++ b/examples/lookahead/requirements.txt
@ -1,4 +1,4 @@
-tensorrt_llm==0.18.1
+tensorrt_llm==0.18.2
 datasets~=2.14.5
 rouge_score~=0.1.2
 sentencepiece>=0.1.99
--- a/examples/mamba/requirements.txt
+++ b/examples/mamba/requirements.txt
@ -1,4 +1,4 @@
-tensorrt_llm==0.18.1
+tensorrt_llm==0.18.2
 transformers>=4.39.0
 datasets~=2.14.5
 evaluate
--- a/examples/medusa/requirements.txt
+++ b/examples/medusa/requirements.txt
@ -1,4 +1,4 @@
-tensorrt_llm==0.18.1
+tensorrt_llm==0.18.2
 datasets~=2.14.5
 rouge_score~=0.1.2
 sentencepiece>=0.1.99
--- a/examples/mixtral/requirements.txt
+++ b/examples/mixtral/requirements.txt
@ -1,3 +1,3 @@
-tensorrt_llm==0.18.1
+tensorrt_llm==0.18.2
 transformers==4.38.2
 accelerate==0.25.0
--- a/examples/mpt/requirements.txt
+++ b/examples/mpt/requirements.txt
@ -1,4 +1,4 @@
-tensorrt_llm==0.18.1
+tensorrt_llm==0.18.2
 datasets~=2.14.5
 evaluate~=0.4.1
 rouge_score~=0.1.2
--- a/examples/nemotron/requirements.txt
+++ b/examples/nemotron/requirements.txt
@ -1,4 +1,4 @@
-tensorrt_llm==0.18.1
+tensorrt_llm==0.18.2
 nemo-toolkit[all]==2.0.0rc1
 megatron-core==0.8.0
 datasets~=2.14.5
--- a/examples/opt/requirements.txt
+++ b/examples/opt/requirements.txt
@ -1,4 +1,4 @@
-tensorrt_llm==0.18.1
+tensorrt_llm==0.18.2
 datasets~=2.14.5
 evaluate~=0.4.1
 rouge_score~=0.1.2
--- a/examples/phi/requirements.txt
+++ b/examples/phi/requirements.txt
@ -1,4 +1,4 @@
-tensorrt_llm==0.18.1
+tensorrt_llm==0.18.2
 datasets~=2.14.5
 evaluate~=0.4.1
 rouge_score~=0.1.2
--- a/examples/prompt_lookup/requirements.txt
+++ b/examples/prompt_lookup/requirements.txt
@ -1,5 +1,5 @@
 --extra-index-url https://pypi.nvidia.com
-tensorrt_llm==0.18.1
+tensorrt_llm==0.18.2
 datasets~=2.14.5
 rouge_score~=0.1.2
 sentencepiece~=0.1.99
--- a/examples/quantization/requirements.txt
+++ b/examples/quantization/requirements.txt
@ -1,4 +1,4 @@
-tensorrt_llm==0.18.1
+tensorrt_llm==0.18.2
 datasets>=2.14.4
 nemo-toolkit[all]==2.0.0rc1
 rouge_score~=0.1.2
--- a/examples/qwen/requirements.txt
+++ b/examples/qwen/requirements.txt
@ -1,4 +1,4 @@
-tensorrt_llm==0.18.1
+tensorrt_llm==0.18.2
 datasets~=2.16.0
 evaluate~=0.4.1
 rouge_score~=0.1.2
--- a/examples/qwenvl/requirements.txt
+++ b/examples/qwenvl/requirements.txt
@ -1,4 +1,4 @@
-tensorrt_llm==0.18.1
+tensorrt_llm==0.18.2
 datasets~=2.16.0
 evaluate~=0.4.1
 rouge_score~=0.1.2
--- a/examples/recurrentgemma/requirements.txt
+++ b/examples/recurrentgemma/requirements.txt
@ -1,4 +1,4 @@
-tensorrt_llm==0.18.1
+tensorrt_llm==0.18.2
 git+https://github.com/google-deepmind/recurrentgemma.git@8a32e365
 flax>=0.8.2
 jax~=0.4.23
--- a/examples/redrafter/requirements.txt
+++ b/examples/redrafter/requirements.txt
@ -1,4 +1,4 @@
-tensorrt_llm==0.18.1
+tensorrt_llm==0.18.2
 datasets~=2.14.5
 rouge_score~=0.1.2
 sentencepiece>=0.1.99
--- a/examples/skywork/requirements.txt
+++ b/examples/skywork/requirements.txt
@ -1,4 +1,4 @@
-tensorrt_llm==0.18.1
+tensorrt_llm==0.18.2
 datasets~=2.16.1
 evaluate~=0.4.1
 rouge_score~=0.1.2
--- a/examples/smaug/requirements.txt
+++ b/examples/smaug/requirements.txt
@ -1,4 +1,4 @@
-tensorrt_llm==0.18.1
+tensorrt_llm==0.18.2
 datasets==2.14.6
 evaluate~=0.4.1
 rouge_score~=0.1.2
--- a/examples/whisper/requirements.txt
+++ b/examples/whisper/requirements.txt
@ -1,4 +1,4 @@
-tensorrt_llm==0.18.1
+tensorrt_llm==0.18.2
 tiktoken
 datasets
 kaldialign
--- a/tensorrt_llm/executor.py
+++ b/tensorrt_llm/executor.py
@ -4,6 +4,8 @@ import concurrent.futures
 import copy
 import datetime
 import faulthandler
+import hashlib
+import hmac
 import io
 import json
 import multiprocessing
@ -21,7 +23,7 @@ from multiprocessing.shared_memory import SharedMemory
 from pathlib import Path
 from queue import Empty, Queue
 from typing import (Any, Callable, Dict, Generator, List, Literal, NamedTuple,
-                    Optional, Tuple, Union)
+                    Optional, Union)
 from weakref import WeakMethod

 import numpy as np
@ -1220,14 +1222,20 @@ class ExecutorBindingsWorker(GenerationExecutor):
 class ZeroMqQueue:
    ''' A Queue-like container for IPC using ZeroMQ. '''

-    def __init__(self, address: Optional[str] = None, *, is_server: bool):
+    def __init__(self,
+                 address: Optional[tuple[str, Optional[bytes]]] = None,
+                 *,
+                 is_server: bool,
+                 use_hmac_encryption: bool = True):
        '''
        Parameters:
-            address (Tuple[str, str], optional): The address (tcp-ip_port, authkey) for the IPC. Defaults to None.
+            address (tuple[str, Optional[bytes]], optional): The address (tcp-ip_port, hmac_auth_key) for the IPC. Defaults to None. If hmac_auth_key is None and use_hmac_encryption is False, the queue will not use HMAC encryption.
            is_server (bool): Whether the current process is the server or the client.
+            use_hmac_encryption (bool): Whether to use HMAC encryption for pickled data. Defaults to True.
        '''

-        self.address = address or "tcp://127.0.0.1:*"
+        self.address_endpoint = address[
+            0] if address is not None else "tcp://127.0.0.1:*"
        self.is_server = is_server
        self.context = zmq.Context()
        self.poller = None
@ -1236,11 +1244,35 @@ class ZeroMqQueue:
        self._setup_done = False

        self.socket = self.context.socket(zmq.PAIR)
+
+        # HMAC encryption setup
+        self.hmac_key = address[1] if address is not None else None
+        self.use_hmac_encryption = use_hmac_encryption
+
+        # Check HMAC key condition
+        if self.use_hmac_encryption and self.is_server and self.hmac_key is not None:
+            raise ValueError(
+                "Server should not receive HMAC key when encryption is enabled")
+        elif self.use_hmac_encryption and not self.is_server and self.hmac_key is None:
+            raise ValueError(
+                "Client must receive HMAC key when encryption is enabled")
+        elif not self.use_hmac_encryption and self.hmac_key is not None:
+            raise ValueError(
+                "Server and client should not receive HMAC key when encryption is disabled"
+            )
+
        if self.is_server:
            self.socket.bind(
-                self.address
+                self.address_endpoint
            )  # Binds to the address and occupy a port immediately
-            self.address = self.socket.getsockopt(zmq.LAST_ENDPOINT).decode()
+            self.address_endpoint = self.socket.getsockopt(
+                zmq.LAST_ENDPOINT).decode()
+
+            if self.use_hmac_encryption:
+                # Initialize HMAC key for pickle encryption
+                self.hmac_key = os.urandom(32)
+
+            self.address = (self.address_endpoint, self.hmac_key)

    def setup_lazily(self):
        if self._setup_done:
@ -1248,7 +1280,7 @@ class ZeroMqQueue:
        self._setup_done = True

        if not self.is_server:
-            self.socket.connect(self.address)
+            self.socket.connect(self.address_endpoint)
        self.poller = zmq.Poller()
        self.poller.register(self.socket, zmq.POLLIN)

@ -1276,14 +1308,34 @@ class ZeroMqQueue:
                                              is_final=obj.is_final,
                                              error=obj.error)

-        message = pickle.dumps(obj)  # nosec B301
-        self.socket.send(message)
+        if self.use_hmac_encryption:
+            # Send pickled data with HMAC appended
+            data = pickle.dumps(obj)  # nosec B301
+            signed_data = self._sign_data(data)
+            self.socket.send(signed_data)
+        else:
+            # Send data without HMAC
+            self.socket.send_pyobj(obj)

    def get(self) -> Any:
        self.setup_lazily()

-        message = self.socket.recv()
-        obj = pickle.loads(message)  # nosec B301
+        if self.use_hmac_encryption:
+            # Receive signed data with HMAC
+            signed_data = self.socket.recv()
+
+            # Split data and HMAC
+            data = signed_data[:-32]
+            actual_hmac = signed_data[-32:]
+
+            # Verify HMAC
+            if not self._verify_hmac(data, actual_hmac):
+                raise RuntimeError("HMAC verification failed")
+
+            obj = pickle.loads(data)  # nosec B301
+        else:
+            # Receive data without HMAC
+            obj = self.socket.recv_pyobj()

        if isinstance(obj, GenerationExecutor.Response):
            tensors = self._load_tensors_from_shmm(obj.tensors)
@ -1355,6 +1407,17 @@ class ZeroMqQueue:
            cum_log_probs=tensors.cum_log_probs,
        )

+    def _verify_hmac(self, data: bytes, actual_hmac: bytes) -> bool:
+        """Verify the HMAC of received pickle data."""
+        expected_hmac = hmac.new(self.hmac_key, data, hashlib.sha256).digest()
+        return hmac.compare_digest(expected_hmac, actual_hmac)
+
+    def _sign_data(self, data_before_encoding: bytes) -> bytes:
+        """Generate HMAC for data."""
+        hmac_signature = hmac.new(self.hmac_key, data_before_encoding,
+                                  hashlib.sha256).digest()
+        return data_before_encoding + hmac_signature
+
    def __del__(self):
        self.close()

@ -1366,7 +1429,7 @@ class FusedIpcQueue:
    ''' A Queue-like container for IPC with optional message batched. '''

    def __init__(self,
-                 address: Optional[str] = None,
+                 address: Optional[tuple[str, Optional[bytes]]] = None,
                 *,
                 is_server: bool,
                 fuse_message=False,
@ -1444,7 +1507,7 @@ class FusedIpcQueue:
        return obj

    @property
-    def address(self) -> Tuple[str, int, bytes]:
+    def address(self) -> tuple[str, Optional[bytes]]:
        return self.queue.address

    def __del__(self):
@ -1515,10 +1578,10 @@ class ExecutorBindingsProxy(GenerationExecutor):
    @staticmethod
    def workers_main(
        engine: Union[Path, Engine],
-        request_queue_addr: Tuple[str, int, bytes],
-        request_error_queue_addr: Tuple[str, int, bytes],
-        result_queue_addr: Tuple[str, int, bytes],
-        stats_queue_addr: Tuple[str, int, bytes],
+        request_queue_addr: tuple[str, Optional[bytes]],
+        request_error_queue_addr: tuple[str, Optional[bytes]],
+        result_queue_addr: tuple[str, Optional[bytes]],
+        stats_queue_addr: tuple[str, Optional[bytes]],
        executor_config: Optional[tllm.ExecutorConfig] = None,
        logits_post_processor_map: Optional[Dict[str, Callable]] = None,
        worker_cls: type = ExecutorBindingsWorker,
--- a/tensorrt_llm/version.py
+++ b/tensorrt_llm/version.py
@ -12,4 +12,4 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "0.18.1"
+__version__ = "0.18.2"