From 5ca3376d6f2aa1d865d200003406869fbb3875ed Mon Sep 17 00:00:00 2001 From: Zhanrui Sun Date: Fri, 5 Sep 2025 00:04:22 -0700 Subject: [PATCH] Support DLFW sanity check use CU13 image Signed-off-by: Zhanrui Sun --- jenkins/Build.groovy | 12 ++++++++---- jenkins/L0_Test.groovy | 37 ++++++++++++++++++++++++++++--------- requirements.txt | 2 +- 3 files changed, 37 insertions(+), 14 deletions(-) diff --git a/jenkins/Build.groovy b/jenkins/Build.groovy index 1a0971099e..34be217550 100644 --- a/jenkins/Build.groovy +++ b/jenkins/Build.groovy @@ -34,6 +34,9 @@ def TARNAME = "tarName" @Field def WHEEL_ARCHS = "wheelArchs" +@Field +def BUILD_JOBS_FOR_CONFIG = "buildJobsForConfig" + @Field def CONFIG_LINUX_X86_64_VANILLA = "linux_x86_64_Vanilla" @@ -109,6 +112,7 @@ def BUILD_CONFIGS = [ (WHEEL_EXTRA_ARGS) : "--extra-cmake-vars WARNING_IS_ERROR=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_CUDA_HOST_COMPILER=clang -DCMAKE_LINKER_TYPE=LLD", (TARNAME) : "llvm-TensorRT-LLM-GH200.tar.gz", (WHEEL_ARCHS): "90-real;100-real;120-real", + (BUILD_JOBS_FOR_CONFIG): "4", // TODO: Remove after fix the build OOM issue on SBSA ], ] @@ -457,8 +461,10 @@ def runLLMBuild(pipeline, buildFlags, tarName, is_linux_x86_64) trtllm_utils.replaceWithAlternativeTRT(env.alternativeTRT, "cp312") } + def buildJobs = buildFlags[BUILD_JOBS_FOR_CONFIG] ?: BUILD_JOBS + withCredentials([usernamePassword(credentialsId: "urm-artifactory-creds", usernameVariable: 'CONAN_LOGIN_USERNAME', passwordVariable: 'CONAN_PASSWORD')]) { - sh "cd ${LLM_ROOT} && python3 scripts/build_wheel.py --use_ccache -G Ninja -j ${BUILD_JOBS} -a '${buildFlags[WHEEL_ARCHS]}' ${buildFlags[WHEEL_EXTRA_ARGS]} --benchmarks" + sh "cd ${LLM_ROOT} && python3 scripts/build_wheel.py --use_ccache -G Ninja -j ${buildJobs} -a '${buildFlags[WHEEL_ARCHS]}' ${buildFlags[WHEEL_EXTRA_ARGS]} --benchmarks" } if (is_linux_x86_64) { sh "cd ${LLM_ROOT} && python3 scripts/build_cpp_examples.py" @@ -472,7 +478,7 @@ def runLLMBuild(pipeline, buildFlags, tarName, is_linux_x86_64) if (tarName.contains("CU12")) { tritonShortTag = "r25.06" } - sh "cd ${LLM_ROOT}/triton_backend/inflight_batcher_llm && mkdir build && cd build && cmake .. -DTRTLLM_DIR=${llmPath} -DTRITON_COMMON_REPO_TAG=${tritonShortTag} -DTRITON_CORE_REPO_TAG=${tritonShortTag} -DTRITON_THIRD_PARTY_REPO_TAG=${tritonShortTag} -DTRITON_BACKEND_REPO_TAG=${tritonShortTag} -DUSE_CXX11_ABI=ON && make -j${BUILD_JOBS} install" + sh "cd ${LLM_ROOT}/triton_backend/inflight_batcher_llm && mkdir build && cd build && cmake .. -DTRTLLM_DIR=${llmPath} -DTRITON_COMMON_REPO_TAG=${tritonShortTag} -DTRITON_CORE_REPO_TAG=${tritonShortTag} -DTRITON_THIRD_PARTY_REPO_TAG=${tritonShortTag} -DTRITON_BACKEND_REPO_TAG=${tritonShortTag} -DUSE_CXX11_ABI=ON && make -j${buildJobs} install" // Step 3: packaging wheels into tarfile sh "cp ${LLM_ROOT}/build/tensorrt_llm-*.whl TensorRT-LLM/" @@ -579,8 +585,6 @@ def launchStages(pipeline, cpu_arch, enableFailFast, globalVars) "Build TRT-LLM SingleDevice": [LLM_DOCKER_IMAGE] + prepareLLMBuild( pipeline, CONFIG_LINUX_X86_64_SINGLE_DEVICE), ] - } else { - buildConfigs.remove("Build TRT-LLM LLVM") // TODO: Remove after LLVM is supported on AArch64 } rtServer ( diff --git a/jenkins/L0_Test.groovy b/jenkins/L0_Test.groovy index aa972b2afa..deae86ae08 100644 --- a/jenkins/L0_Test.groovy +++ b/jenkins/L0_Test.groovy @@ -904,7 +904,8 @@ def createKubernetesPodConfig(image, type, arch = "amd64", gpuCount = 1, perfMod - key: "kubernetes.io/hostname" operator: In values: - - "lego-cg1-qct-066.ipp3a2.colossus\"""" + - "lego-cg1-qct-066.ipp3a2.colossus" + - "lego-cg1-qct-069.ipp3a2.colossus\"""" } def podConfig = [ @@ -2160,13 +2161,13 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null) // Python version and OS for sanity check x86SanityCheckConfigs = [ - "PY312-DLFW-CU12": [ - LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE_12_9, - "B200_PCIe", + "PY312-DLFW": [ + LLM_DOCKER_IMAGE, + "A10", X86_64_TRIPLE, - true, + false, "dlfw/", - DLFW_IMAGE_12_9, + DLFW_IMAGE, false, ], "PY310-UB2204-CU12": [ @@ -2199,13 +2200,13 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null) UBUNTU_24_04_IMAGE, true, // Extra PyTorch CUDA 12.8 install ], - "PY312-DLFW-CU12": [ - LLM_SBSA_DOCKER_IMAGE_12_9, + "PY312-DLFW": [ + LLM_DOCKER_IMAGE, "GH200", AARCH64_TRIPLE, false, "dlfw/", - DLFW_IMAGE_12_9, + DLFW_IMAGE, false, ], ] @@ -2321,6 +2322,24 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null) } } + // TODO: Remove this after public triton supports CUDA 13. + if (key == "PY312-DLFW" && values[2] == X86_64_TRIPLE) { + trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 install https://download.pytorch.org/whl/nightly/pytorch_triton-3.3.1%2Bgitc8757738-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl") + sh """ + cd /usr/local/lib/python3.12/dist-packages/ && \ + ls -la | grep pytorch_triton && \ + mv pytorch_triton-3.3.1+gitc8757738.dist-info triton-3.3.1+gitc8757738.dist-info && \ + cd triton-3.3.1+gitc8757738.dist-info && \ + echo "Current directory: \$(pwd)" && \ + echo "Files in directory:" && \ + ls -la && \ + sed -i 's/^Name: pytorch-triton/Name: triton/' METADATA && \ + sed -i 's|pytorch_triton-3.3.1+gitc8757738.dist-info/|triton-3.3.1+gitc8757738.dist-info/|g' RECORD && \ + echo "METADATA after update:" && \ + grep "^Name:" METADATA + """ + } + def libEnv = [] if (env.alternativeTRT) { stage("Replace TensorRT") { diff --git a/requirements.txt b/requirements.txt index 0a574d5333..e59d5abd09 100644 --- a/requirements.txt +++ b/requirements.txt @@ -70,7 +70,7 @@ ninja etcd3 blake3 soundfile -triton>=3.3.1,<3.4.0; platform_machine == "x86_64" +triton==3.3.1; platform_machine == "x86_64" tiktoken blobfile openai-harmony==0.0.4