mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
infra: update DLFW 25.08 GA, triton 25.08 GA
Signed-off-by: Zhanrui Sun <zhanruis@nvidia.com>
This commit is contained in:
parent
9ad68de159
commit
ee37589c8c
@ -1,8 +1,8 @@
|
||||
# Multi-stage Dockerfile
|
||||
ARG BASE_IMAGE=gitlab-master.nvidia.com:5005/dl/dgx/pytorch
|
||||
ARG TRITON_IMAGE=gitlab-master.nvidia.com:5005/dl/dgx/tritonserver
|
||||
ARG BASE_TAG=25.08-py3.32674667-devel
|
||||
ARG TRITON_BASE_TAG=25.08-py3.32978230
|
||||
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch
|
||||
ARG TRITON_IMAGE=nvcr.io/nvidia/tritonserver
|
||||
ARG BASE_TAG=25.08-py3
|
||||
ARG TRITON_BASE_TAG=25.08-py3
|
||||
ARG DEVEL_IMAGE=devel
|
||||
|
||||
FROM ${BASE_IMAGE}:${BASE_TAG} AS base
|
||||
|
||||
@ -79,6 +79,7 @@ endef
|
||||
%_build:
|
||||
@echo "Building docker image: $(IMAGE_WITH_TAG)"
|
||||
docker buildx build $(DOCKER_BUILD_OPTS) $(DOCKER_BUILD_ARGS) \
|
||||
--network=host \
|
||||
--progress $(DOCKER_PROGRESS) \
|
||||
$(if $(BASE_IMAGE), --build-arg BASE_IMAGE=$(BASE_IMAGE)) \
|
||||
$(if $(BASE_TAG), --build-arg BASE_TAG=$(BASE_TAG)) \
|
||||
@ -191,16 +192,16 @@ jenkins-aarch64_%: STAGE = tritondevel
|
||||
jenkins-rockylinux8_%: PYTHON_VERSION_TAG_ID = $(if $(findstring 3.12,${PYTHON_VERSION}),PY312,$(if $(findstring 3.10,${PYTHON_VERSION}),PY310,$(error Unknown PYTHON_VERSION specified)))
|
||||
jenkins-rockylinux8_%: IMAGE_WITH_TAG = $(shell . ../jenkins/current_image_tags.properties && echo $$LLM_ROCKYLINUX8_${PYTHON_VERSION_TAG_ID}_DOCKER_IMAGE)
|
||||
jenkins-rockylinux8_%: STAGE = tritondevel
|
||||
jenkins-rockylinux8_%: BASE_IMAGE = nvidia/cuda
|
||||
jenkins-rockylinux8_%: BASE_IMAGE = nvcr.io/nvidia/cuda
|
||||
jenkins-rockylinux8_%: BASE_TAG = 13.0.0-devel-rockylinux8
|
||||
|
||||
rockylinux8_%: STAGE = tritondevel
|
||||
rockylinux8_%: BASE_IMAGE = nvidia/cuda
|
||||
rockylinux8_%: BASE_IMAGE = nvcr.io/nvidia/cuda
|
||||
rockylinux8_%: BASE_TAG = 13.0.0-devel-rockylinux8
|
||||
|
||||
# For x86_64 and aarch64
|
||||
ubuntu22_%: STAGE = tritondevel
|
||||
ubuntu22_%: BASE_IMAGE = nvidia/cuda
|
||||
ubuntu22_%: BASE_IMAGE = nvcr.io/nvidia/cuda
|
||||
ubuntu22_%: BASE_TAG = 13.0.0-devel-ubuntu22.04
|
||||
|
||||
trtllm_%: STAGE = release
|
||||
|
||||
@ -4,8 +4,8 @@ set -ex
|
||||
|
||||
# Use latest stable version from https://pypi.org/project/torch/#history
|
||||
# and closest to the version specified in
|
||||
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-06.html#rel-25-06
|
||||
TORCH_VERSION="2.7.1"
|
||||
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-08.html#rel-25-08
|
||||
TORCH_VERSION="2.8.0"
|
||||
SYSTEM_ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
|
||||
|
||||
prepare_environment() {
|
||||
|
||||
@ -448,6 +448,9 @@ def runLLMBuild(pipeline, buildFlags, tarName, is_linux_x86_64)
|
||||
pipArgs = ""
|
||||
}
|
||||
|
||||
if (tarName.contains("_CU12")) {
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "cd ${LLM_ROOT} && sed -i '/^# .*<For CUDA 12\\.9>\$/ {s/^# //; n; s/^/# /}' requirements.txt && cat requirements.txt")
|
||||
}
|
||||
// install python package
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "cd ${LLM_ROOT} && pip3 install -r requirements-dev.txt ${pipArgs}")
|
||||
|
||||
@ -577,6 +580,8 @@ def launchStages(pipeline, cpu_arch, enableFailFast, globalVars)
|
||||
"Build TRT-LLM SingleDevice": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
|
||||
pipeline, CONFIG_LINUX_X86_64_SINGLE_DEVICE),
|
||||
]
|
||||
} else {
|
||||
buildConfigs.remove("Build TRT-LLM LLVM") // TODO: Remove after LLVM is supported on AArch64
|
||||
}
|
||||
|
||||
rtServer (
|
||||
|
||||
@ -37,11 +37,16 @@ LLM_DOCKER_IMAGE = env.dockerImage
|
||||
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = env.wheelDockerImagePy310
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = env.wheelDockerImagePy312
|
||||
|
||||
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE_12_9="urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202508051130-6090"
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE_12_9="urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202508051130-6090"
|
||||
|
||||
LLM_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090"
|
||||
LLM_SBSA_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090"
|
||||
|
||||
DLFW_IMAGE_12_9 = "urm.nvidia.com/docker/nvidia/pytorch:25.06-py3"
|
||||
|
||||
// DLFW torch image
|
||||
DLFW_IMAGE = "urm.nvidia.com/docker/nvidia/pytorch:25.06-py3"
|
||||
DLFW_IMAGE = "urm.nvidia.com/docker/nvidia/pytorch:25.08-py3"
|
||||
|
||||
//Ubuntu base image
|
||||
UBUNTU_22_04_IMAGE = "urm.nvidia.com/docker/ubuntu:22.04"
|
||||
@ -769,6 +774,16 @@ def createKubernetesPodConfig(image, type, arch = "amd64", gpuCount = 1, perfMod
|
||||
path: /vol/scratch1/scratch.svc_tensorrt_blossom
|
||||
"""
|
||||
}
|
||||
// TODO: remove this after GH200 driver upgrade
|
||||
def hostnameMatch = ""
|
||||
if (type == "gh200") {
|
||||
hostnameMatch = """
|
||||
- key: "kubernetes.io/hostname"
|
||||
operator: In
|
||||
values:
|
||||
- "lego-cg1-qct-066.ipp3a2.colossus\""""
|
||||
}
|
||||
|
||||
def podConfig = [
|
||||
cloud: targetCould,
|
||||
namespace: "sw-tensorrt",
|
||||
@ -788,7 +803,7 @@ def createKubernetesPodConfig(image, type, arch = "amd64", gpuCount = 1, perfMod
|
||||
- key: "tensorrt/affinity"
|
||||
operator: NotIn
|
||||
values:
|
||||
- "core"
|
||||
- "core"${hostnameMatch}
|
||||
nodeSelector: ${selectors}
|
||||
containers:
|
||||
${containerConfig}
|
||||
@ -1354,7 +1369,7 @@ def runLLMTestlistOnPlatformImpl(pipeline, platform, testList, config=VANILLA_CO
|
||||
sh "cd ${llmSrc} && sed -i 's#tensorrt~=.*\$#tensorrt#g' requirements.txt && cat requirements.txt"
|
||||
}
|
||||
if (stageName.contains("-CU12")) {
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "cd ${llmSrc} && sed -i 's/-cu13/-cu12/g' requirements.txt && cat requirements.txt")
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "cd ${llmSrc} && sed -i '/^# .*<For CUDA 12\\.9>\$/ {s/^# //; n; s/^/# /}' requirements.txt && cat requirements.txt")
|
||||
}
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "cd ${llmSrc} && pip3 install --retries 1 -r requirements-dev.txt")
|
||||
if (!skipInstallWheel) {
|
||||
@ -1616,7 +1631,7 @@ def checkPipInstall(pipeline, wheel_path)
|
||||
}
|
||||
|
||||
|
||||
def runLLMBuild(pipeline, cpu_arch, reinstall_dependencies=false, wheel_path="", cpver="cp312")
|
||||
def runLLMBuild(pipeline, cpu_arch, reinstall_dependencies=false, wheel_path="", cpver="cp312", is_cu12=false)
|
||||
{
|
||||
sh "pwd && ls -alh"
|
||||
sh "env | sort"
|
||||
@ -1624,7 +1639,10 @@ def runLLMBuild(pipeline, cpu_arch, reinstall_dependencies=false, wheel_path="",
|
||||
|
||||
trtllm_utils.checkoutSource(LLM_REPO, env.gitlabCommit, "tensorrt_llm", true, true)
|
||||
if (env.alternativeTRT) {
|
||||
sh "cd ${LLM_ROOT} && sed -i 's#tensorrt~=.*\$#tensorrt#g' requirements.txt && cat requirements.txt"
|
||||
sh "cd tensorrt_llm/ && sed -i 's#tensorrt~=.*\$#tensorrt#g' requirements.txt && cat requirements.txt"
|
||||
}
|
||||
if (is_cu12) {
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "cd tensorrt_llm/ && sed -i '/^# .*<For CUDA 12\\.9>\$/ {s/^# //; n; s/^/# /}' requirements.txt && cat requirements.txt")
|
||||
}
|
||||
|
||||
// Random sleep to avoid resource contention
|
||||
@ -2013,17 +2031,17 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
|
||||
// Python version and OS for sanity check
|
||||
x86SanityCheckConfigs = [
|
||||
"PY312-DLFW": [
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE,
|
||||
"PY312-DLFW-CU12": [
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE_12_9,
|
||||
"B200_PCIe",
|
||||
X86_64_TRIPLE,
|
||||
true,
|
||||
"dlfw/",
|
||||
DLFW_IMAGE,
|
||||
DLFW_IMAGE_12_9,
|
||||
false,
|
||||
],
|
||||
"PY310-UB2204": [
|
||||
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE,
|
||||
"PY310-UB2204-CU12": [
|
||||
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE_12_9,
|
||||
"A10",
|
||||
X86_64_TRIPLE,
|
||||
true,
|
||||
@ -2031,8 +2049,8 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
UBUNTU_22_04_IMAGE,
|
||||
false,
|
||||
],
|
||||
"PY312-UB2404": [
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE,
|
||||
"PY312-UB2404-CU12": [
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE_12_9,
|
||||
"RTX5090",
|
||||
X86_64_TRIPLE,
|
||||
true,
|
||||
@ -2043,7 +2061,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
]
|
||||
|
||||
aarch64SanityCheckConfigs = [
|
||||
"PY312-UB2404": [
|
||||
"PY312-UB2404-CU12": [
|
||||
LLM_SBSA_DOCKER_IMAGE_12_9,
|
||||
"GH200",
|
||||
AARCH64_TRIPLE,
|
||||
@ -2052,13 +2070,13 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
UBUNTU_24_04_IMAGE,
|
||||
true, // Extra PyTorch CUDA 12.8 install
|
||||
],
|
||||
"PY312-DLFW": [
|
||||
"PY312-DLFW-CU12": [
|
||||
LLM_SBSA_DOCKER_IMAGE_12_9,
|
||||
"GH200",
|
||||
AARCH64_TRIPLE,
|
||||
false,
|
||||
"dlfw/",
|
||||
DLFW_IMAGE,
|
||||
DLFW_IMAGE_12_9,
|
||||
false,
|
||||
],
|
||||
]
|
||||
@ -2114,7 +2132,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
env = ["LD_LIBRARY_PATH+=:/usr/local/cuda/compat"]
|
||||
}
|
||||
withEnv(env) {
|
||||
wheelName = runLLMBuild(pipeline, cpu_arch, values[3], wheelPath, cpver)
|
||||
wheelName = runLLMBuild(pipeline, cpu_arch, values[3], wheelPath, cpver, key.contains("CU12"))
|
||||
}
|
||||
}
|
||||
|
||||
@ -2139,7 +2157,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
echo "###### Prerequisites Start ######"
|
||||
echoNodeAndGpuInfo(pipeline, toStageName(values[1], key))
|
||||
// Clean up the pip constraint file from the base NGC PyTorch image.
|
||||
if (values[5] == DLFW_IMAGE) {
|
||||
if (values[5] == DLFW_IMAGE || values[5] == DLFW_IMAGE_12_9) {
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "[ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true")
|
||||
}
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get update")
|
||||
@ -2148,21 +2166,30 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 config set global.break-system-packages true")
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 install requests")
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 uninstall -y tensorrt")
|
||||
if (values[5] != DLFW_IMAGE) {
|
||||
if (values[5] != DLFW_IMAGE && values[5] != DLFW_IMAGE_12_9) {
|
||||
def ubuntu_version = key.contains("UB2404") ? "ubuntu2404" : "ubuntu2204"
|
||||
def platform = cpu_arch == X86_64_TRIPLE ? "x86_64" : "sbsa"
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "wget https://developer.download.nvidia.com/compute/cuda/repos/${ubuntu_version}/${platform}/cuda-keyring_1.1-1_all.deb")
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "dpkg -i cuda-keyring_1.1-1_all.deb")
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get update")
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get -y install cuda-toolkit-12-9")
|
||||
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 install 'cuda-python>=12,<13' 'nvidia-ml-py>=12,<13'")
|
||||
if (key.contains("CU12")) {
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get -y install cuda-toolkit-12-9")
|
||||
} else {
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get -y install cuda-toolkit-13-0")
|
||||
}
|
||||
}
|
||||
if (key.contains("CU12")) {
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "sed -i '/^# .*<For CUDA 12\\.9>\$/ {s/^# //; n; s/^/# /}' ${LLM_ROOT}/requirements.txt")
|
||||
sh "cat ${LLM_ROOT}/requirements.txt"
|
||||
}
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "sed -i 's/-cu13/-cu12/g' ${LLM_ROOT}/requirements.txt")
|
||||
// Extra PyTorch CUDA 12.8 install for SBSA platform and Blackwell GPUs bare-metal environments
|
||||
if (values[6]) {
|
||||
echo "###### Extra PyTorch CUDA 12.8 install Start ######"
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 install torch==2.7.1 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128")
|
||||
if (key.contains("CU12")) {
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 install torch==2.7.1 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128")
|
||||
} else {
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 install torch==2.8.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128")
|
||||
}
|
||||
}
|
||||
|
||||
def libEnv = []
|
||||
@ -2181,9 +2208,10 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
}
|
||||
}
|
||||
echo "###### Run LLMAPI tests Start ######"
|
||||
def config = VANILLA_CONFIG_CU12
|
||||
|
||||
def config = key.contains("CU12") ? VANILLA_CONFIG_CU12 : VANILLA_CONFIG
|
||||
if (cpu_arch == AARCH64_TRIPLE) {
|
||||
config = LINUX_AARCH64_CONFIG_CU12
|
||||
config = key.contains("CU12") ? LINUX_AARCH64_CONFIG_CU12 : LINUX_AARCH64_CONFIG
|
||||
}
|
||||
withEnv(libEnv) {
|
||||
sh "env | sort"
|
||||
|
||||
@ -15,9 +15,7 @@ LLM_DOCKER_IMAGE_12_9=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.
|
||||
LLM_SBSA_DOCKER_IMAGE_12_9=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202508051130-6090
|
||||
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE_12_9=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202508051130-6090
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE_12_9=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202508051130-6090
|
||||
LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.08-py3-x86_64-ubuntu24.04-trt10.13.2.6-skip-tritondevel-202508151730-pre-test
|
||||
LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.08-py3-aarch64-ubuntu24.04-trt10.13.2.6-skip-tritondevel-202508151730-pre-test
|
||||
#LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.0-devel-rocky8-x86_64-rocky8-py310-trt10.13.2.6-skip-tritondevel-202508151730-pre-test
|
||||
#LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.0-devel-rocky8-x86_64-rocky8-py312-trt10.13.2.6-skip-tritondevel-202508151730-pre-test
|
||||
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202508051130-6090
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202508051130-6090
|
||||
LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.08-py3-x86_64-ubuntu24.04-trt10.13.2.6-skip-tritondevel-202508261630-9671
|
||||
LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.08-py3-aarch64-ubuntu24.04-trt10.13.2.6-skip-tritondevel-202508261630-9671
|
||||
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.0-devel-rocky8-x86_64-rocky8-py310-trt10.13.2.6-skip-tritondevel-202508261630-9671
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.0-devel-rocky8-x86_64-rocky8-py312-trt10.13.2.6-skip-tritondevel-202508261630-9671
|
||||
|
||||
@ -3,6 +3,7 @@
|
||||
accelerate>=1.7.0
|
||||
build
|
||||
colored
|
||||
# cuda-python>=12,<13 # <For CUDA 12.9>
|
||||
cuda-python>=12
|
||||
diffusers>=0.27.0
|
||||
lark
|
||||
@ -13,6 +14,7 @@ onnx_graphsurgeon>=0.5.2
|
||||
openai
|
||||
polygraphy
|
||||
psutil
|
||||
# nvidia-ml-py>=12,<13 # <For CUDA 12.9>
|
||||
nvidia-ml-py>=12
|
||||
# Just a wrapper since nvidia-modelopt requires pynvml
|
||||
pynvml==12.0.0
|
||||
@ -21,13 +23,17 @@ pandas
|
||||
h5py==3.12.1
|
||||
StrEnum
|
||||
sentencepiece>=0.1.99
|
||||
tensorrt
|
||||
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-06.html#rel-25-06 uses 2.8.0a0.
|
||||
torch>=2.7.1,<=2.8.0a0
|
||||
# tensorrt>=10.11.0,<=10.13.0 # <For CUDA 12.9>
|
||||
tensorrt~=10.13.0
|
||||
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-08.html#rel-25-08 uses 2.8.0a0.
|
||||
# torch>=2.7.1,<=2.8.0a0 # <For CUDA 12.9>
|
||||
torch>=2.8.0a0,<=2.8.0
|
||||
torchvision
|
||||
nvidia-modelopt[torch]~=0.33.0
|
||||
nvidia-nccl-cu13
|
||||
nvidia-cuda-nvrtc-cu13
|
||||
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-08.html#rel-25-08 uses 2.27.7
|
||||
nvidia-nccl-cu12
|
||||
# nvidia-cuda-nvrtc-cu12 # <For CUDA 12.9>
|
||||
nvidia-cuda-nvrtc
|
||||
transformers==4.55.0
|
||||
prometheus_client
|
||||
prometheus_fastapi_instrumentator
|
||||
@ -64,6 +70,6 @@ ninja
|
||||
etcd3
|
||||
blake3
|
||||
soundfile
|
||||
triton==3.3.1; platform_machine == "x86_64"
|
||||
triton>=3.3.1,<=3.4.0; platform_machine == "x86_64"
|
||||
tiktoken
|
||||
blobfile
|
||||
|
||||
@ -865,6 +865,42 @@ def main(*,
|
||||
# and validating python changes in the whl.
|
||||
clear_folder(dist_dir)
|
||||
|
||||
# Modify requirements.txt for wheel build based on CUDA version
|
||||
def modify_requirements_for_cuda():
|
||||
requirements_file = project_dir / ("requirements-windows.txt"
|
||||
if on_windows else
|
||||
"requirements.txt")
|
||||
if os.environ.get("CUDA_VERSION", "").startswith("12."):
|
||||
print(
|
||||
"Detected CUDA 12 environment, modifying requirements.txt for wheel build..."
|
||||
)
|
||||
with open(requirements_file, 'r', encoding='utf-8') as f:
|
||||
lines = f.readlines()
|
||||
modified_lines = []
|
||||
i = 0
|
||||
while i < len(lines):
|
||||
line = lines[i]
|
||||
if "<For CUDA 12.9>" in line and line.strip().startswith(
|
||||
"#"):
|
||||
new_line = line.replace("# ", "", 1)
|
||||
print(
|
||||
f"Enable CUDA 12.9 dependency: {new_line.strip()}")
|
||||
modified_lines.append(new_line)
|
||||
print(
|
||||
f"Disable CUDA 13 dependency: # {lines[i + 1].strip()}"
|
||||
)
|
||||
modified_lines.append("# " + lines[i + 1])
|
||||
i += 1
|
||||
else:
|
||||
modified_lines.append(line)
|
||||
i += 1
|
||||
with open(requirements_file, 'w', encoding='utf-8') as f:
|
||||
f.writelines(modified_lines)
|
||||
return True
|
||||
return False
|
||||
|
||||
modify_requirements_for_cuda()
|
||||
|
||||
build_run(
|
||||
f'\"{venv_python}\" -m build {project_dir} --skip-dependency-check --no-isolation --wheel --outdir "{dist_dir}"'
|
||||
)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user