mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
TensorRT-LLM v0.18.2 release (#3611)
This commit is contained in:
parent
62f3c954b9
commit
5aec7af45f
@ -9,7 +9,7 @@ TensorRT-LLM
|
||||
[](https://www.python.org/downloads/release/python-31012/)
|
||||
[](https://developer.nvidia.com/cuda-downloads)
|
||||
[](https://developer.nvidia.com/tensorrt)
|
||||
[](./tensorrt_llm/version.py)
|
||||
[](./tensorrt_llm/version.py)
|
||||
[](./LICENSE)
|
||||
|
||||
[Architecture](./docs/source/architecture/overview.md) | [Performance](./docs/source/performance/perf-overview.md) | [Examples](./examples/) | [Documentation](./docs/source/) | [Roadmap](https://docs.google.com/presentation/d/1gycPmtdh7uUcH6laOvW65Dbp9F1McUkGDIcAyjicBZs/edit?usp=sharing)
|
||||
|
||||
@ -1,2 +1,2 @@
|
||||
9f9942768fd5b0cf5ed19860ad539dc9 libtensorrt_llm_ucx_wrapper.so
|
||||
d2efc6043262c896e262e8d8b97055af0f1f8b47 commit
|
||||
edf502396e4443f284a5fae6044402478cf457c1 commit
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e015c5cab637202b76f6ccd2d59b1427dc739f10321996a003230ba32814c08b
|
||||
oid sha256:ee2a324ae76a843823d1d82686bb495d097367e1c3a41aa9596fd0d2ba3fadae
|
||||
size 8408224
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1205f8fe60cc657645441c14c304888ad7cd68bc4cd1fabd10931a719560a42a
|
||||
oid sha256:4fcd95de792c72a38d4c1e76a4e714a2c69ffc25f03172075efb432e40ec29cd
|
||||
size 8374456
|
||||
|
||||
@ -1,2 +1,2 @@
|
||||
e383212a40dca932c7b77bf4544dab80 libtensorrt_llm_ucx_wrapper.so
|
||||
d2efc6043262c896e262e8d8b97055af0f1f8b47 commit
|
||||
edf502396e4443f284a5fae6044402478cf457c1 commit
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e2fbf80d02c115b9eeb2c18d24e3cb55f0c5404eba563591abeab7d223518df6
|
||||
oid sha256:6fd8ea50100bbbdc9d1d52d4b7e9a82f01583884eeb4d2703d537b6785c63ea7
|
||||
size 3102764
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:27f9acc83aa72979834cc8c216cf06e6ab4e9b10a7d1c9928bac3721fef037bd
|
||||
oid sha256:cd30000142d1256991fa27644d86dcd12a4c017eab9345a88ac705914aba8d11
|
||||
size 3145744
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
61ab1a6d4c62ee2a648f6daa5083c4de libtensorrt_llm_executor_static.a
|
||||
2f2bc67944c45ce0965704da43c9b1c4 libtensorrt_llm_executor_static.pre_cxx11.a
|
||||
d2efc6043262c896e262e8d8b97055af0f1f8b47 commit
|
||||
1146671822817c690387dc77d775b8c7 libtensorrt_llm_executor_static.a
|
||||
8f7cb0047a0c2690497a97911a60ed6d libtensorrt_llm_executor_static.pre_cxx11.a
|
||||
edf502396e4443f284a5fae6044402478cf457c1 commit
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ae603dd0c585a7ee601fb6816ac2cdde674d5c49b96c7dce88de2bc67ea727bc
|
||||
oid sha256:22951d2bb0e5da2a1eb20ae0eb74690ddd57e7f1dd9545762eed1e0f468dd4a5
|
||||
size 3457520
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:7adc486890442df336e145b0ccf982bc733f1a9cc8116f7ce56f1769cf7b1154
|
||||
oid sha256:0c13a28fc903da20aad74aeb1c3d04a3b1bf91421fdbe85a16b3552b3e7e431b
|
||||
size 3448406
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
e5da8cc2936606dfb49f4417d6961060 libtensorrt_llm_executor_static.a
|
||||
ad5dfb89c2d719d99d67346828e92e25 libtensorrt_llm_executor_static.pre_cxx11.a
|
||||
d2efc6043262c896e262e8d8b97055af0f1f8b47 commit
|
||||
34a5173ddebafd3f1621af2717a92f54 libtensorrt_llm_executor_static.a
|
||||
34eacc123dc995815fbd1e68ec98f78b libtensorrt_llm_executor_static.pre_cxx11.a
|
||||
edf502396e4443f284a5fae6044402478cf457c1 commit
|
||||
@ -1,2 +1,2 @@
|
||||
f3143205203b038b9dca6dd32cf02f59 libtensorrt_llm_nvrtc_wrapper.so
|
||||
d2efc6043262c896e262e8d8b97055af0f1f8b47 commit
|
||||
edf502396e4443f284a5fae6044402478cf457c1 commit
|
||||
@ -1,2 +1,2 @@
|
||||
770ca93818f3f04837a67353e3f71fbc libtensorrt_llm_nvrtc_wrapper.so
|
||||
d2efc6043262c896e262e8d8b97055af0f1f8b47 commit
|
||||
edf502396e4443f284a5fae6044402478cf457c1 commit
|
||||
@ -1,3 +1,3 @@
|
||||
6bf0ba4e9b8b1152a21316243d30bec6 libtensorrt_llm_internal_cutlass_kernels_static.a
|
||||
96f8a359c84a78ba415f4d98ef1c4e1d libtensorrt_llm_internal_cutlass_kernels_static.pre_cxx11.a
|
||||
d2efc6043262c896e262e8d8b97055af0f1f8b47 commit
|
||||
edf502396e4443f284a5fae6044402478cf457c1 commit
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0b0f621e74dd506e49acd027dc09e9d2a3a6e0117ca0af68254841c02fb9c1dd
|
||||
size 68126454
|
||||
oid sha256:447838fe5c798098410a2cfed027aa38df847da2f725b9b8ccec57e73a1e194a
|
||||
size 68114502
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:8448e105a5002148083bacc6c5066e017719ccd532c021f4c821df74fa0b763f
|
||||
size 68295728
|
||||
oid sha256:42b88e56cee5b9b81a66836add80ba79819afa24cbfd72140f4d62a244e3f960
|
||||
size 68295696
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
0b3322f5047dd4ee549211c2d15483c4 libtensorrt_llm_internal_cutlass_kernels_static.a
|
||||
502d4901fad6e648b8858051017c4cf2 libtensorrt_llm_internal_cutlass_kernels_static.pre_cxx11.a
|
||||
d2efc6043262c896e262e8d8b97055af0f1f8b47 commit
|
||||
4de75ffa1ff225422ba27f367175448f libtensorrt_llm_internal_cutlass_kernels_static.a
|
||||
e91d6c762f26c0b158eba8f376914e6e libtensorrt_llm_internal_cutlass_kernels_static.pre_cxx11.a
|
||||
edf502396e4443f284a5fae6044402478cf457c1 commit
|
||||
@ -5,6 +5,12 @@
|
||||
All published functionality in the Release Notes has been fully tested and verified with known limitations documented. To share feedback about this release, access our [NVIDIA Developer Forum](https://forums.developer.nvidia.com/).
|
||||
|
||||
|
||||
## TensorRT-LLM Release 0.18.2
|
||||
|
||||
### Key Features and Enhancements
|
||||
- This update addresses known security issues. For the latest NVIDIA Vulnerability Disclosure Information visit https://www.nvidia.com/en-us/security/.
|
||||
|
||||
|
||||
## TensorRT-LLM Release 0.18.1
|
||||
|
||||
### Key Features and Enhancements
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
tensorrt_llm==0.18.1
|
||||
tensorrt_llm==0.18.2
|
||||
datasets~=2.15.0
|
||||
evaluate~=0.4.1
|
||||
rouge_score~=0.1.2
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
tensorrt_llm==0.18.1
|
||||
tensorrt_llm==0.18.2
|
||||
datasets~=2.14.5
|
||||
evaluate~=0.4.1
|
||||
rouge_score~=0.1.2
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
tensorrt_llm==0.18.1
|
||||
tensorrt_llm==0.18.2
|
||||
datasets~=2.14.5
|
||||
evaluate~=0.4.1
|
||||
protobuf
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
tensorrt_llm==0.18.1
|
||||
tensorrt_llm==0.18.2
|
||||
datasets==2.14.6
|
||||
evaluate~=0.4.1
|
||||
rouge_score~=0.1.2
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
tensorrt_llm==0.18.1
|
||||
tensorrt_llm==0.18.2
|
||||
datasets~=2.14.5
|
||||
evaluate~=0.4.1
|
||||
rouge_score~=0.1.2
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
tensorrt_llm==0.18.1
|
||||
tensorrt_llm==0.18.2
|
||||
datasets~=2.14.6
|
||||
evaluate~=0.4.1
|
||||
rouge_score~=0.1.2
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
tensorrt_llm==0.18.1
|
||||
tensorrt_llm==0.18.2
|
||||
datasets~=2.14.5
|
||||
rouge_score~=0.1.2
|
||||
sentencepiece>=0.1.99
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
tensorrt_llm==0.18.1
|
||||
tensorrt_llm==0.18.2
|
||||
datasets~=2.14.5
|
||||
rouge_score~=0.1.2
|
||||
SentencePiece~=0.1.99
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
tensorrt_llm==0.18.1
|
||||
tensorrt_llm==0.18.2
|
||||
transformers>=4.31.0
|
||||
datasets~=2.14.5
|
||||
evaluate~=0.4.1
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
# WAR the new posting of "nvidia-cudnn-cu12~=9.0".
|
||||
# "jax[cuda12_pip]~=0.4.19" specifies "nvidia-cudnn-cu12>=8.9" but actually requires "nvidia-cudnn-cu12~=8.9".
|
||||
nvidia-cudnn-cu12~=8.9; platform_machine == "x86_64"
|
||||
tensorrt_llm==0.18.1
|
||||
tensorrt_llm==0.18.2
|
||||
flax~=0.8.0
|
||||
# jax[cuda12_pip]~=0.4.19; platform_system != "Windows"
|
||||
jax~=0.4.19; platform_system == "Windows"
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
tensorrt_llm==0.18.1
|
||||
tensorrt_llm==0.18.2
|
||||
datasets~=2.14.5
|
||||
evaluate~=0.4.1
|
||||
rouge_score~=0.1.2
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
tensorrt_llm==0.18.1
|
||||
tensorrt_llm==0.18.2
|
||||
datasets~=2.14.5
|
||||
evaluate~=0.4.1
|
||||
rouge_score~=0.1.2
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
tensorrt_llm==0.18.1
|
||||
tensorrt_llm==0.18.2
|
||||
datasets~=2.14.5
|
||||
rouge_score~=0.1.2
|
||||
evaluate~=0.4.1
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
-f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
|
||||
tensorrt_llm==0.18.1
|
||||
tensorrt_llm==0.18.2
|
||||
datasets==2.14.6
|
||||
evaluate~=0.4.1
|
||||
rouge_score~=0.1.2
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
tensorrt_llm==0.18.1
|
||||
tensorrt_llm==0.18.2
|
||||
datasets==2.14.5
|
||||
rouge_score~=0.1.2
|
||||
sentencepiece>=0.1.99
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
tensorrt_llm==0.18.1
|
||||
tensorrt_llm==0.18.2
|
||||
datasets~=2.14.5
|
||||
evaluate~=0.4.1
|
||||
rouge_score~=0.1.2
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
tensorrt_llm==0.18.1
|
||||
tensorrt_llm==0.18.2
|
||||
transformers>=4.43.0
|
||||
datasets==2.14.6
|
||||
evaluate~=0.4.1
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
tensorrt_llm==0.18.1
|
||||
tensorrt_llm==0.18.2
|
||||
datasets~=2.14.5
|
||||
rouge_score~=0.1.2
|
||||
sentencepiece>=0.1.99
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
tensorrt_llm==0.18.1
|
||||
tensorrt_llm==0.18.2
|
||||
transformers>=4.39.0
|
||||
datasets~=2.14.5
|
||||
evaluate
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
tensorrt_llm==0.18.1
|
||||
tensorrt_llm==0.18.2
|
||||
datasets~=2.14.5
|
||||
rouge_score~=0.1.2
|
||||
sentencepiece>=0.1.99
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
tensorrt_llm==0.18.1
|
||||
tensorrt_llm==0.18.2
|
||||
transformers==4.38.2
|
||||
accelerate==0.25.0
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
tensorrt_llm==0.18.1
|
||||
tensorrt_llm==0.18.2
|
||||
datasets~=2.14.5
|
||||
evaluate~=0.4.1
|
||||
rouge_score~=0.1.2
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
tensorrt_llm==0.18.1
|
||||
tensorrt_llm==0.18.2
|
||||
nemo-toolkit[all]==2.0.0rc1
|
||||
megatron-core==0.8.0
|
||||
datasets~=2.14.5
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
tensorrt_llm==0.18.1
|
||||
tensorrt_llm==0.18.2
|
||||
datasets~=2.14.5
|
||||
evaluate~=0.4.1
|
||||
rouge_score~=0.1.2
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
tensorrt_llm==0.18.1
|
||||
tensorrt_llm==0.18.2
|
||||
datasets~=2.14.5
|
||||
evaluate~=0.4.1
|
||||
rouge_score~=0.1.2
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
--extra-index-url https://pypi.nvidia.com
|
||||
tensorrt_llm==0.18.1
|
||||
tensorrt_llm==0.18.2
|
||||
datasets~=2.14.5
|
||||
rouge_score~=0.1.2
|
||||
sentencepiece~=0.1.99
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
tensorrt_llm==0.18.1
|
||||
tensorrt_llm==0.18.2
|
||||
datasets>=2.14.4
|
||||
nemo-toolkit[all]==2.0.0rc1
|
||||
rouge_score~=0.1.2
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
tensorrt_llm==0.18.1
|
||||
tensorrt_llm==0.18.2
|
||||
datasets~=2.16.0
|
||||
evaluate~=0.4.1
|
||||
rouge_score~=0.1.2
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
tensorrt_llm==0.18.1
|
||||
tensorrt_llm==0.18.2
|
||||
datasets~=2.16.0
|
||||
evaluate~=0.4.1
|
||||
rouge_score~=0.1.2
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
tensorrt_llm==0.18.1
|
||||
tensorrt_llm==0.18.2
|
||||
git+https://github.com/google-deepmind/recurrentgemma.git@8a32e365
|
||||
flax>=0.8.2
|
||||
jax~=0.4.23
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
tensorrt_llm==0.18.1
|
||||
tensorrt_llm==0.18.2
|
||||
datasets~=2.14.5
|
||||
rouge_score~=0.1.2
|
||||
sentencepiece>=0.1.99
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
tensorrt_llm==0.18.1
|
||||
tensorrt_llm==0.18.2
|
||||
datasets~=2.16.1
|
||||
evaluate~=0.4.1
|
||||
rouge_score~=0.1.2
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
tensorrt_llm==0.18.1
|
||||
tensorrt_llm==0.18.2
|
||||
datasets==2.14.6
|
||||
evaluate~=0.4.1
|
||||
rouge_score~=0.1.2
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
tensorrt_llm==0.18.1
|
||||
tensorrt_llm==0.18.2
|
||||
tiktoken
|
||||
datasets
|
||||
kaldialign
|
||||
|
||||
@ -4,6 +4,8 @@ import concurrent.futures
|
||||
import copy
|
||||
import datetime
|
||||
import faulthandler
|
||||
import hashlib
|
||||
import hmac
|
||||
import io
|
||||
import json
|
||||
import multiprocessing
|
||||
@ -21,7 +23,7 @@ from multiprocessing.shared_memory import SharedMemory
|
||||
from pathlib import Path
|
||||
from queue import Empty, Queue
|
||||
from typing import (Any, Callable, Dict, Generator, List, Literal, NamedTuple,
|
||||
Optional, Tuple, Union)
|
||||
Optional, Union)
|
||||
from weakref import WeakMethod
|
||||
|
||||
import numpy as np
|
||||
@ -1220,14 +1222,20 @@ class ExecutorBindingsWorker(GenerationExecutor):
|
||||
class ZeroMqQueue:
|
||||
''' A Queue-like container for IPC using ZeroMQ. '''
|
||||
|
||||
def __init__(self, address: Optional[str] = None, *, is_server: bool):
|
||||
def __init__(self,
|
||||
address: Optional[tuple[str, Optional[bytes]]] = None,
|
||||
*,
|
||||
is_server: bool,
|
||||
use_hmac_encryption: bool = True):
|
||||
'''
|
||||
Parameters:
|
||||
address (Tuple[str, str], optional): The address (tcp-ip_port, authkey) for the IPC. Defaults to None.
|
||||
address (tuple[str, Optional[bytes]], optional): The address (tcp-ip_port, hmac_auth_key) for the IPC. Defaults to None. If hmac_auth_key is None and use_hmac_encryption is False, the queue will not use HMAC encryption.
|
||||
is_server (bool): Whether the current process is the server or the client.
|
||||
use_hmac_encryption (bool): Whether to use HMAC encryption for pickled data. Defaults to True.
|
||||
'''
|
||||
|
||||
self.address = address or "tcp://127.0.0.1:*"
|
||||
self.address_endpoint = address[
|
||||
0] if address is not None else "tcp://127.0.0.1:*"
|
||||
self.is_server = is_server
|
||||
self.context = zmq.Context()
|
||||
self.poller = None
|
||||
@ -1236,11 +1244,35 @@ class ZeroMqQueue:
|
||||
self._setup_done = False
|
||||
|
||||
self.socket = self.context.socket(zmq.PAIR)
|
||||
|
||||
# HMAC encryption setup
|
||||
self.hmac_key = address[1] if address is not None else None
|
||||
self.use_hmac_encryption = use_hmac_encryption
|
||||
|
||||
# Check HMAC key condition
|
||||
if self.use_hmac_encryption and self.is_server and self.hmac_key is not None:
|
||||
raise ValueError(
|
||||
"Server should not receive HMAC key when encryption is enabled")
|
||||
elif self.use_hmac_encryption and not self.is_server and self.hmac_key is None:
|
||||
raise ValueError(
|
||||
"Client must receive HMAC key when encryption is enabled")
|
||||
elif not self.use_hmac_encryption and self.hmac_key is not None:
|
||||
raise ValueError(
|
||||
"Server and client should not receive HMAC key when encryption is disabled"
|
||||
)
|
||||
|
||||
if self.is_server:
|
||||
self.socket.bind(
|
||||
self.address
|
||||
self.address_endpoint
|
||||
) # Binds to the address and occupy a port immediately
|
||||
self.address = self.socket.getsockopt(zmq.LAST_ENDPOINT).decode()
|
||||
self.address_endpoint = self.socket.getsockopt(
|
||||
zmq.LAST_ENDPOINT).decode()
|
||||
|
||||
if self.use_hmac_encryption:
|
||||
# Initialize HMAC key for pickle encryption
|
||||
self.hmac_key = os.urandom(32)
|
||||
|
||||
self.address = (self.address_endpoint, self.hmac_key)
|
||||
|
||||
def setup_lazily(self):
|
||||
if self._setup_done:
|
||||
@ -1248,7 +1280,7 @@ class ZeroMqQueue:
|
||||
self._setup_done = True
|
||||
|
||||
if not self.is_server:
|
||||
self.socket.connect(self.address)
|
||||
self.socket.connect(self.address_endpoint)
|
||||
self.poller = zmq.Poller()
|
||||
self.poller.register(self.socket, zmq.POLLIN)
|
||||
|
||||
@ -1276,14 +1308,34 @@ class ZeroMqQueue:
|
||||
is_final=obj.is_final,
|
||||
error=obj.error)
|
||||
|
||||
message = pickle.dumps(obj) # nosec B301
|
||||
self.socket.send(message)
|
||||
if self.use_hmac_encryption:
|
||||
# Send pickled data with HMAC appended
|
||||
data = pickle.dumps(obj) # nosec B301
|
||||
signed_data = self._sign_data(data)
|
||||
self.socket.send(signed_data)
|
||||
else:
|
||||
# Send data without HMAC
|
||||
self.socket.send_pyobj(obj)
|
||||
|
||||
def get(self) -> Any:
|
||||
self.setup_lazily()
|
||||
|
||||
message = self.socket.recv()
|
||||
obj = pickle.loads(message) # nosec B301
|
||||
if self.use_hmac_encryption:
|
||||
# Receive signed data with HMAC
|
||||
signed_data = self.socket.recv()
|
||||
|
||||
# Split data and HMAC
|
||||
data = signed_data[:-32]
|
||||
actual_hmac = signed_data[-32:]
|
||||
|
||||
# Verify HMAC
|
||||
if not self._verify_hmac(data, actual_hmac):
|
||||
raise RuntimeError("HMAC verification failed")
|
||||
|
||||
obj = pickle.loads(data) # nosec B301
|
||||
else:
|
||||
# Receive data without HMAC
|
||||
obj = self.socket.recv_pyobj()
|
||||
|
||||
if isinstance(obj, GenerationExecutor.Response):
|
||||
tensors = self._load_tensors_from_shmm(obj.tensors)
|
||||
@ -1355,6 +1407,17 @@ class ZeroMqQueue:
|
||||
cum_log_probs=tensors.cum_log_probs,
|
||||
)
|
||||
|
||||
def _verify_hmac(self, data: bytes, actual_hmac: bytes) -> bool:
|
||||
"""Verify the HMAC of received pickle data."""
|
||||
expected_hmac = hmac.new(self.hmac_key, data, hashlib.sha256).digest()
|
||||
return hmac.compare_digest(expected_hmac, actual_hmac)
|
||||
|
||||
def _sign_data(self, data_before_encoding: bytes) -> bytes:
|
||||
"""Generate HMAC for data."""
|
||||
hmac_signature = hmac.new(self.hmac_key, data_before_encoding,
|
||||
hashlib.sha256).digest()
|
||||
return data_before_encoding + hmac_signature
|
||||
|
||||
def __del__(self):
|
||||
self.close()
|
||||
|
||||
@ -1366,7 +1429,7 @@ class FusedIpcQueue:
|
||||
''' A Queue-like container for IPC with optional message batched. '''
|
||||
|
||||
def __init__(self,
|
||||
address: Optional[str] = None,
|
||||
address: Optional[tuple[str, Optional[bytes]]] = None,
|
||||
*,
|
||||
is_server: bool,
|
||||
fuse_message=False,
|
||||
@ -1444,7 +1507,7 @@ class FusedIpcQueue:
|
||||
return obj
|
||||
|
||||
@property
|
||||
def address(self) -> Tuple[str, int, bytes]:
|
||||
def address(self) -> tuple[str, Optional[bytes]]:
|
||||
return self.queue.address
|
||||
|
||||
def __del__(self):
|
||||
@ -1515,10 +1578,10 @@ class ExecutorBindingsProxy(GenerationExecutor):
|
||||
@staticmethod
|
||||
def workers_main(
|
||||
engine: Union[Path, Engine],
|
||||
request_queue_addr: Tuple[str, int, bytes],
|
||||
request_error_queue_addr: Tuple[str, int, bytes],
|
||||
result_queue_addr: Tuple[str, int, bytes],
|
||||
stats_queue_addr: Tuple[str, int, bytes],
|
||||
request_queue_addr: tuple[str, Optional[bytes]],
|
||||
request_error_queue_addr: tuple[str, Optional[bytes]],
|
||||
result_queue_addr: tuple[str, Optional[bytes]],
|
||||
stats_queue_addr: tuple[str, Optional[bytes]],
|
||||
executor_config: Optional[tllm.ExecutorConfig] = None,
|
||||
logits_post_processor_map: Optional[Dict[str, Callable]] = None,
|
||||
worker_cls: type = ExecutorBindingsWorker,
|
||||
|
||||
@ -12,4 +12,4 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
__version__ = "0.18.1"
|
||||
__version__ = "0.18.2"
|
||||
|
||||
Loading…
Reference in New Issue
Block a user