Support DLFW sanity check use CU13 image

Signed-off-by: Zhanrui Sun <zhanruis@nvidia.com>
This commit is contained in:
Zhanrui Sun 2025-09-05 00:04:22 -07:00
parent 1978227bb7
commit 5ca3376d6f
3 changed files with 37 additions and 14 deletions

View File

@ -34,6 +34,9 @@ def TARNAME = "tarName"
@Field
def WHEEL_ARCHS = "wheelArchs"
@Field
def BUILD_JOBS_FOR_CONFIG = "buildJobsForConfig"
@Field
def CONFIG_LINUX_X86_64_VANILLA = "linux_x86_64_Vanilla"
@ -109,6 +112,7 @@ def BUILD_CONFIGS = [
(WHEEL_EXTRA_ARGS) : "--extra-cmake-vars WARNING_IS_ERROR=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_CUDA_HOST_COMPILER=clang -DCMAKE_LINKER_TYPE=LLD",
(TARNAME) : "llvm-TensorRT-LLM-GH200.tar.gz",
(WHEEL_ARCHS): "90-real;100-real;120-real",
(BUILD_JOBS_FOR_CONFIG): "4", // TODO: Remove after fix the build OOM issue on SBSA
],
]
@ -457,8 +461,10 @@ def runLLMBuild(pipeline, buildFlags, tarName, is_linux_x86_64)
trtllm_utils.replaceWithAlternativeTRT(env.alternativeTRT, "cp312")
}
def buildJobs = buildFlags[BUILD_JOBS_FOR_CONFIG] ?: BUILD_JOBS
withCredentials([usernamePassword(credentialsId: "urm-artifactory-creds", usernameVariable: 'CONAN_LOGIN_USERNAME', passwordVariable: 'CONAN_PASSWORD')]) {
sh "cd ${LLM_ROOT} && python3 scripts/build_wheel.py --use_ccache -G Ninja -j ${BUILD_JOBS} -a '${buildFlags[WHEEL_ARCHS]}' ${buildFlags[WHEEL_EXTRA_ARGS]} --benchmarks"
sh "cd ${LLM_ROOT} && python3 scripts/build_wheel.py --use_ccache -G Ninja -j ${buildJobs} -a '${buildFlags[WHEEL_ARCHS]}' ${buildFlags[WHEEL_EXTRA_ARGS]} --benchmarks"
}
if (is_linux_x86_64) {
sh "cd ${LLM_ROOT} && python3 scripts/build_cpp_examples.py"
@ -472,7 +478,7 @@ def runLLMBuild(pipeline, buildFlags, tarName, is_linux_x86_64)
if (tarName.contains("CU12")) {
tritonShortTag = "r25.06"
}
sh "cd ${LLM_ROOT}/triton_backend/inflight_batcher_llm && mkdir build && cd build && cmake .. -DTRTLLM_DIR=${llmPath} -DTRITON_COMMON_REPO_TAG=${tritonShortTag} -DTRITON_CORE_REPO_TAG=${tritonShortTag} -DTRITON_THIRD_PARTY_REPO_TAG=${tritonShortTag} -DTRITON_BACKEND_REPO_TAG=${tritonShortTag} -DUSE_CXX11_ABI=ON && make -j${BUILD_JOBS} install"
sh "cd ${LLM_ROOT}/triton_backend/inflight_batcher_llm && mkdir build && cd build && cmake .. -DTRTLLM_DIR=${llmPath} -DTRITON_COMMON_REPO_TAG=${tritonShortTag} -DTRITON_CORE_REPO_TAG=${tritonShortTag} -DTRITON_THIRD_PARTY_REPO_TAG=${tritonShortTag} -DTRITON_BACKEND_REPO_TAG=${tritonShortTag} -DUSE_CXX11_ABI=ON && make -j${buildJobs} install"
// Step 3: packaging wheels into tarfile
sh "cp ${LLM_ROOT}/build/tensorrt_llm-*.whl TensorRT-LLM/"
@ -579,8 +585,6 @@ def launchStages(pipeline, cpu_arch, enableFailFast, globalVars)
"Build TRT-LLM SingleDevice": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
pipeline, CONFIG_LINUX_X86_64_SINGLE_DEVICE),
]
} else {
buildConfigs.remove("Build TRT-LLM LLVM") // TODO: Remove after LLVM is supported on AArch64
}
rtServer (

View File

@ -904,7 +904,8 @@ def createKubernetesPodConfig(image, type, arch = "amd64", gpuCount = 1, perfMod
- key: "kubernetes.io/hostname"
operator: In
values:
- "lego-cg1-qct-066.ipp3a2.colossus\""""
- "lego-cg1-qct-066.ipp3a2.colossus"
- "lego-cg1-qct-069.ipp3a2.colossus\""""
}
def podConfig = [
@ -2160,13 +2161,13 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
// Python version and OS for sanity check
x86SanityCheckConfigs = [
"PY312-DLFW-CU12": [
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE_12_9,
"B200_PCIe",
"PY312-DLFW": [
LLM_DOCKER_IMAGE,
"A10",
X86_64_TRIPLE,
true,
false,
"dlfw/",
DLFW_IMAGE_12_9,
DLFW_IMAGE,
false,
],
"PY310-UB2204-CU12": [
@ -2199,13 +2200,13 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
UBUNTU_24_04_IMAGE,
true, // Extra PyTorch CUDA 12.8 install
],
"PY312-DLFW-CU12": [
LLM_SBSA_DOCKER_IMAGE_12_9,
"PY312-DLFW": [
LLM_DOCKER_IMAGE,
"GH200",
AARCH64_TRIPLE,
false,
"dlfw/",
DLFW_IMAGE_12_9,
DLFW_IMAGE,
false,
],
]
@ -2321,6 +2322,24 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
}
}
// TODO: Remove this after public triton supports CUDA 13.
if (key == "PY312-DLFW" && values[2] == X86_64_TRIPLE) {
trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 install https://download.pytorch.org/whl/nightly/pytorch_triton-3.3.1%2Bgitc8757738-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl")
sh """
cd /usr/local/lib/python3.12/dist-packages/ && \
ls -la | grep pytorch_triton && \
mv pytorch_triton-3.3.1+gitc8757738.dist-info triton-3.3.1+gitc8757738.dist-info && \
cd triton-3.3.1+gitc8757738.dist-info && \
echo "Current directory: \$(pwd)" && \
echo "Files in directory:" && \
ls -la && \
sed -i 's/^Name: pytorch-triton/Name: triton/' METADATA && \
sed -i 's|pytorch_triton-3.3.1+gitc8757738.dist-info/|triton-3.3.1+gitc8757738.dist-info/|g' RECORD && \
echo "METADATA after update:" && \
grep "^Name:" METADATA
"""
}
def libEnv = []
if (env.alternativeTRT) {
stage("Replace TensorRT") {

View File

@ -70,7 +70,7 @@ ninja
etcd3
blake3
soundfile
triton>=3.3.1,<3.4.0; platform_machine == "x86_64"
triton==3.3.1; platform_machine == "x86_64"
tiktoken
blobfile
openai-harmony==0.0.4