TensorRT-LLMs/requirements.txt

--extra-index-url https://download.pytorch.org/whl/cu128
-c constraints.txt
accelerate>=1.7.0
build
colored
# cuda-python>=12,<13  # <For CUDA 12.9>
cuda-python>=12
diffusers>=0.27.0
lark
mpi4py
numpy<2
onnx>=1.18.0
onnx_graphsurgeon>=0.5.2
openai
polygraphy
psutil
# nvidia-ml-py>=12,<13  # <For CUDA 12.9>
nvidia-ml-py>=12
# Just a wrapper since nvidia-modelopt requires pynvml
pynvml==12.0.0
pulp
pandas
h5py==3.12.1
StrEnum
sentencepiece>=0.1.99
# tensorrt>=10.11.0,<=10.13.0  # <For CUDA 12.9>
tensorrt~=10.13.0
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-08.html#rel-25-08 uses 2.8.0a0.
# torch>=2.7.1,<=2.8.0a0  # <For CUDA 12.9>
torch>=2.8.0a0,<=2.8.0
torchvision
nvidia-modelopt[torch]~=0.33.0
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-08.html#rel-25-08 uses 2.27.7
nvidia-nccl-cu12
# nvidia-cuda-nvrtc-cu12  # <For CUDA 12.9>
nvidia-cuda-nvrtc
transformers==4.55.0
prometheus_client
prometheus_fastapi_instrumentator
pydantic>=2.9.1
pydantic-settings[yaml]
omegaconf
pillow==10.3.0
wheel<=0.45.1
optimum
# evaluate needs datasets>=2.0.0 which triggers datasets>3.1.0 which is not stable: https://github.com/huggingface/datasets/issues/7467
datasets==3.1.0
evaluate
mpmath>=1.3.0
click
click_option_group
aenum
pyzmq
fastapi==0.115.4
uvicorn
setuptools<80
ordered-set
peft
einops
flashinfer-python @ git+https://github.com/VALLIS-NERIA/flashinfer.git@c4817ae34a69d8edf5b1922b62cfb0a9b4eb0c42
opencv-python-headless
xgrammar==0.1.21
llguidance==0.7.29
jsonschema
backoff
nvtx
matplotlib # FIXME: this is added to make nvtx happy
meson
ninja
etcd3
blake3
soundfile
triton>=3.3.1,<=3.4.0; platform_machine == "x86_64"
tiktoken
blobfile