mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-23 12:12:39 +08:00
Support DLFW sanity check use CU13 image
Signed-off-by: Zhanrui Sun <zhanruis@nvidia.com>
This commit is contained in:
parent
1978227bb7
commit
5ca3376d6f
@ -34,6 +34,9 @@ def TARNAME = "tarName"
|
||||
@Field
|
||||
def WHEEL_ARCHS = "wheelArchs"
|
||||
|
||||
@Field
|
||||
def BUILD_JOBS_FOR_CONFIG = "buildJobsForConfig"
|
||||
|
||||
@Field
|
||||
def CONFIG_LINUX_X86_64_VANILLA = "linux_x86_64_Vanilla"
|
||||
|
||||
@ -109,6 +112,7 @@ def BUILD_CONFIGS = [
|
||||
(WHEEL_EXTRA_ARGS) : "--extra-cmake-vars WARNING_IS_ERROR=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_CUDA_HOST_COMPILER=clang -DCMAKE_LINKER_TYPE=LLD",
|
||||
(TARNAME) : "llvm-TensorRT-LLM-GH200.tar.gz",
|
||||
(WHEEL_ARCHS): "90-real;100-real;120-real",
|
||||
(BUILD_JOBS_FOR_CONFIG): "4", // TODO: Remove after fix the build OOM issue on SBSA
|
||||
],
|
||||
]
|
||||
|
||||
@ -457,8 +461,10 @@ def runLLMBuild(pipeline, buildFlags, tarName, is_linux_x86_64)
|
||||
trtllm_utils.replaceWithAlternativeTRT(env.alternativeTRT, "cp312")
|
||||
}
|
||||
|
||||
def buildJobs = buildFlags[BUILD_JOBS_FOR_CONFIG] ?: BUILD_JOBS
|
||||
|
||||
withCredentials([usernamePassword(credentialsId: "urm-artifactory-creds", usernameVariable: 'CONAN_LOGIN_USERNAME', passwordVariable: 'CONAN_PASSWORD')]) {
|
||||
sh "cd ${LLM_ROOT} && python3 scripts/build_wheel.py --use_ccache -G Ninja -j ${BUILD_JOBS} -a '${buildFlags[WHEEL_ARCHS]}' ${buildFlags[WHEEL_EXTRA_ARGS]} --benchmarks"
|
||||
sh "cd ${LLM_ROOT} && python3 scripts/build_wheel.py --use_ccache -G Ninja -j ${buildJobs} -a '${buildFlags[WHEEL_ARCHS]}' ${buildFlags[WHEEL_EXTRA_ARGS]} --benchmarks"
|
||||
}
|
||||
if (is_linux_x86_64) {
|
||||
sh "cd ${LLM_ROOT} && python3 scripts/build_cpp_examples.py"
|
||||
@ -472,7 +478,7 @@ def runLLMBuild(pipeline, buildFlags, tarName, is_linux_x86_64)
|
||||
if (tarName.contains("CU12")) {
|
||||
tritonShortTag = "r25.06"
|
||||
}
|
||||
sh "cd ${LLM_ROOT}/triton_backend/inflight_batcher_llm && mkdir build && cd build && cmake .. -DTRTLLM_DIR=${llmPath} -DTRITON_COMMON_REPO_TAG=${tritonShortTag} -DTRITON_CORE_REPO_TAG=${tritonShortTag} -DTRITON_THIRD_PARTY_REPO_TAG=${tritonShortTag} -DTRITON_BACKEND_REPO_TAG=${tritonShortTag} -DUSE_CXX11_ABI=ON && make -j${BUILD_JOBS} install"
|
||||
sh "cd ${LLM_ROOT}/triton_backend/inflight_batcher_llm && mkdir build && cd build && cmake .. -DTRTLLM_DIR=${llmPath} -DTRITON_COMMON_REPO_TAG=${tritonShortTag} -DTRITON_CORE_REPO_TAG=${tritonShortTag} -DTRITON_THIRD_PARTY_REPO_TAG=${tritonShortTag} -DTRITON_BACKEND_REPO_TAG=${tritonShortTag} -DUSE_CXX11_ABI=ON && make -j${buildJobs} install"
|
||||
|
||||
// Step 3: packaging wheels into tarfile
|
||||
sh "cp ${LLM_ROOT}/build/tensorrt_llm-*.whl TensorRT-LLM/"
|
||||
@ -579,8 +585,6 @@ def launchStages(pipeline, cpu_arch, enableFailFast, globalVars)
|
||||
"Build TRT-LLM SingleDevice": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
|
||||
pipeline, CONFIG_LINUX_X86_64_SINGLE_DEVICE),
|
||||
]
|
||||
} else {
|
||||
buildConfigs.remove("Build TRT-LLM LLVM") // TODO: Remove after LLVM is supported on AArch64
|
||||
}
|
||||
|
||||
rtServer (
|
||||
|
||||
@ -904,7 +904,8 @@ def createKubernetesPodConfig(image, type, arch = "amd64", gpuCount = 1, perfMod
|
||||
- key: "kubernetes.io/hostname"
|
||||
operator: In
|
||||
values:
|
||||
- "lego-cg1-qct-066.ipp3a2.colossus\""""
|
||||
- "lego-cg1-qct-066.ipp3a2.colossus"
|
||||
- "lego-cg1-qct-069.ipp3a2.colossus\""""
|
||||
}
|
||||
|
||||
def podConfig = [
|
||||
@ -2160,13 +2161,13 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
|
||||
// Python version and OS for sanity check
|
||||
x86SanityCheckConfigs = [
|
||||
"PY312-DLFW-CU12": [
|
||||
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE_12_9,
|
||||
"B200_PCIe",
|
||||
"PY312-DLFW": [
|
||||
LLM_DOCKER_IMAGE,
|
||||
"A10",
|
||||
X86_64_TRIPLE,
|
||||
true,
|
||||
false,
|
||||
"dlfw/",
|
||||
DLFW_IMAGE_12_9,
|
||||
DLFW_IMAGE,
|
||||
false,
|
||||
],
|
||||
"PY310-UB2204-CU12": [
|
||||
@ -2199,13 +2200,13 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
UBUNTU_24_04_IMAGE,
|
||||
true, // Extra PyTorch CUDA 12.8 install
|
||||
],
|
||||
"PY312-DLFW-CU12": [
|
||||
LLM_SBSA_DOCKER_IMAGE_12_9,
|
||||
"PY312-DLFW": [
|
||||
LLM_DOCKER_IMAGE,
|
||||
"GH200",
|
||||
AARCH64_TRIPLE,
|
||||
false,
|
||||
"dlfw/",
|
||||
DLFW_IMAGE_12_9,
|
||||
DLFW_IMAGE,
|
||||
false,
|
||||
],
|
||||
]
|
||||
@ -2321,6 +2322,24 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Remove this after public triton supports CUDA 13.
|
||||
if (key == "PY312-DLFW" && values[2] == X86_64_TRIPLE) {
|
||||
trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 install https://download.pytorch.org/whl/nightly/pytorch_triton-3.3.1%2Bgitc8757738-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl")
|
||||
sh """
|
||||
cd /usr/local/lib/python3.12/dist-packages/ && \
|
||||
ls -la | grep pytorch_triton && \
|
||||
mv pytorch_triton-3.3.1+gitc8757738.dist-info triton-3.3.1+gitc8757738.dist-info && \
|
||||
cd triton-3.3.1+gitc8757738.dist-info && \
|
||||
echo "Current directory: \$(pwd)" && \
|
||||
echo "Files in directory:" && \
|
||||
ls -la && \
|
||||
sed -i 's/^Name: pytorch-triton/Name: triton/' METADATA && \
|
||||
sed -i 's|pytorch_triton-3.3.1+gitc8757738.dist-info/|triton-3.3.1+gitc8757738.dist-info/|g' RECORD && \
|
||||
echo "METADATA after update:" && \
|
||||
grep "^Name:" METADATA
|
||||
"""
|
||||
}
|
||||
|
||||
def libEnv = []
|
||||
if (env.alternativeTRT) {
|
||||
stage("Replace TensorRT") {
|
||||
|
||||
@ -70,7 +70,7 @@ ninja
|
||||
etcd3
|
||||
blake3
|
||||
soundfile
|
||||
triton>=3.3.1,<3.4.0; platform_machine == "x86_64"
|
||||
triton==3.3.1; platform_machine == "x86_64"
|
||||
tiktoken
|
||||
blobfile
|
||||
openai-harmony==0.0.4
|
||||
|
||||
Loading…
Reference in New Issue
Block a user