mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
[Infra] - Always use x86 image for the Jenkins agent and few clean-ups (#5753)
Signed-off-by: Yanchao Lu <yanchaol@nvidia.com>
This commit is contained in:
parent
6bddaf6df6
commit
d95ae1378b
@ -16,7 +16,8 @@ AARCH64_TRIPLE = "aarch64-linux-gnu"
|
||||
|
||||
LLM_DOCKER_IMAGE = env.dockerImage
|
||||
|
||||
AGENT_IMAGE = env.dockerImage
|
||||
// Always use x86_64 image for agent
|
||||
AGENT_IMAGE = env.dockerImage.replace("aarch64", "x86_64")
|
||||
|
||||
POD_TIMEOUT_SECONDS = env.podTimeoutSeconds ? env.podTimeoutSeconds : "21600"
|
||||
POD_TIMEOUT_SECONDS_TMP = env.podTimeoutSeconds ? env.podTimeoutSeconds : "43200"
|
||||
|
||||
@ -44,12 +44,6 @@ def getContainerURIs()
|
||||
return uris
|
||||
}
|
||||
|
||||
// TODO: Move common variables to an unified location
|
||||
BUILD_CORES_REQUEST = "8"
|
||||
BUILD_CORES_LIMIT = "8"
|
||||
BUILD_MEMORY_REQUEST = "48Gi"
|
||||
BUILD_MEMORY_LIMIT = "48Gi"
|
||||
|
||||
// Stage choices
|
||||
STAGE_CHOICE_NORMAL = "normal"
|
||||
STAGE_CHOICE_SKIP = "skip"
|
||||
@ -214,37 +208,15 @@ def createKubernetesPodConfig(image, type, arch = "amd64")
|
||||
resources:
|
||||
requests:
|
||||
cpu: '2'
|
||||
memory: 10Gi
|
||||
memory: 5Gi
|
||||
ephemeral-storage: 25Gi
|
||||
limits:
|
||||
cpu: '2'
|
||||
memory: 10Gi
|
||||
memory: 5Gi
|
||||
ephemeral-storage: 25Gi
|
||||
imagePullPolicy: Always"""
|
||||
nodeLabelPrefix = "cpu"
|
||||
break
|
||||
case "build":
|
||||
containerConfig = """
|
||||
- name: trt-llm
|
||||
image: ${image}
|
||||
command: ['cat']
|
||||
volumeMounts:
|
||||
- name: sw-tensorrt-pvc
|
||||
mountPath: "/mnt/sw-tensorrt-pvc"
|
||||
readOnly: false
|
||||
tty: true
|
||||
resources:
|
||||
requests:
|
||||
cpu: ${BUILD_CORES_REQUEST}
|
||||
memory: ${BUILD_MEMORY_REQUEST}
|
||||
ephemeral-storage: 200Gi
|
||||
limits:
|
||||
cpu: ${BUILD_CORES_LIMIT}
|
||||
memory: ${BUILD_MEMORY_LIMIT}
|
||||
ephemeral-storage: 200Gi
|
||||
imagePullPolicy: Always"""
|
||||
nodeLabelPrefix = "cpu"
|
||||
break
|
||||
case "package":
|
||||
containerConfig = """
|
||||
- name: trt-llm
|
||||
@ -254,11 +226,11 @@ def createKubernetesPodConfig(image, type, arch = "amd64")
|
||||
resources:
|
||||
requests:
|
||||
cpu: '2'
|
||||
memory: 10Gi
|
||||
memory: 5Gi
|
||||
ephemeral-storage: 25Gi
|
||||
limits:
|
||||
cpu: '2'
|
||||
memory: 10Gi
|
||||
memory: 5Gi
|
||||
ephemeral-storage: 25Gi
|
||||
imagePullPolicy: Always"""
|
||||
nodeLabelPrefix = "cpu"
|
||||
@ -299,11 +271,11 @@ def createKubernetesPodConfig(image, type, arch = "amd64")
|
||||
resources:
|
||||
requests:
|
||||
cpu: '2'
|
||||
memory: 10Gi
|
||||
memory: 5Gi
|
||||
ephemeral-storage: 25Gi
|
||||
limits:
|
||||
cpu: '2'
|
||||
memory: 10Gi
|
||||
memory: 5Gi
|
||||
ephemeral-storage: 25Gi
|
||||
qosClass: Guaranteed
|
||||
volumes:
|
||||
@ -327,7 +299,7 @@ def echoNodeAndGpuInfo(pipeline, stageName)
|
||||
def setupPipelineEnvironment(pipeline, testFilter, globalVars)
|
||||
{
|
||||
image = "urm.nvidia.com/docker/golang:1.22"
|
||||
setupPipelineSpec = createKubernetesPodConfig(image, "build")
|
||||
setupPipelineSpec = createKubernetesPodConfig(image, "package")
|
||||
trtllm_utils.launchKubernetesPod(pipeline, setupPipelineSpec, "trt-llm", {
|
||||
sh "env | sort"
|
||||
updateGitlabCommitStatus name: "${BUILD_STATUS_NAME}", state: 'running'
|
||||
@ -413,7 +385,7 @@ def launchReleaseCheck(pipeline)
|
||||
|
||||
def image = "urm.nvidia.com/docker/golang:1.22"
|
||||
stageName = "Release Check"
|
||||
trtllm_utils.launchKubernetesPod(pipeline, createKubernetesPodConfig(image, "build"), "trt-llm", {
|
||||
trtllm_utils.launchKubernetesPod(pipeline, createKubernetesPodConfig(image, "package"), "trt-llm", {
|
||||
stage("[${stageName}] Run") {
|
||||
if (RELESE_CHECK_CHOICE == STAGE_CHOICE_SKIP) {
|
||||
echo "Release Check job is skipped due to Jenkins configuration"
|
||||
|
||||
@ -34,11 +34,11 @@ def createKubernetesPodConfig(image, arch = "amd64")
|
||||
resources:
|
||||
requests:
|
||||
cpu: 2
|
||||
memory: 10Gi
|
||||
memory: 5Gi
|
||||
ephemeral-storage: 25Gi
|
||||
limits:
|
||||
cpu: 2
|
||||
memory: 10Gi
|
||||
memory: 5Gi
|
||||
ephemeral-storage: 25Gi
|
||||
imagePullPolicy: Always
|
||||
- name: jnlp
|
||||
@ -47,11 +47,11 @@ def createKubernetesPodConfig(image, arch = "amd64")
|
||||
resources:
|
||||
requests:
|
||||
cpu: '2'
|
||||
memory: 10Gi
|
||||
memory: 5Gi
|
||||
ephemeral-storage: 25Gi
|
||||
limits:
|
||||
cpu: '2'
|
||||
memory: 10Gi
|
||||
memory: 5Gi
|
||||
ephemeral-storage: 25Gi
|
||||
qosClass: Guaranteed
|
||||
volumes:
|
||||
@ -119,10 +119,10 @@ pipeline {
|
||||
case "Reset":
|
||||
sh "rm -rf ${CCACHE_DIR}"
|
||||
sh "mkdir -p ${CCACHE_DIR}"
|
||||
sh "printf \"max_size=300G\ntemporary_dir=/tmp/ccache\ncompression = true\n\" > ${CCACHE_DIR}/ccache.conf"
|
||||
sh "printf 'max_size=500G\ntemporary_dir=/tmp/ccache\ncompression=true\nbase_dir=/home/jenkins/agent/workspace/LLM\nsloppiness=file_macro,time_macros,pch_defines\n' > ${CCACHE_DIR}/ccache.conf"
|
||||
break
|
||||
case "Config":
|
||||
sh "printf \"max_size=300G\ntemporary_dir=/tmp/ccache\ncompression = true\n\" > ${CCACHE_DIR}/ccache.conf"
|
||||
sh "printf 'max_size=500G\ntemporary_dir=/tmp/ccache\ncompression=true\nbase_dir=/home/jenkins/agent/workspace/LLM\nsloppiness=file_macro,time_macros,pch_defines\n' > ${CCACHE_DIR}/ccache.conf"
|
||||
break
|
||||
case "Stats":
|
||||
sh "ccache -sv"
|
||||
|
||||
@ -13,9 +13,9 @@ onnx_graphsurgeon>=0.5.2
|
||||
openai
|
||||
polygraphy
|
||||
psutil
|
||||
nvidia-ml-py>=12
|
||||
nvidia-ml-py>=12,<13
|
||||
# Just a wrapper since nvidia-modelopt requires pynvml
|
||||
pynvml>=12.0.0
|
||||
pynvml==12.0.0
|
||||
pulp
|
||||
pandas
|
||||
h5py==3.12.1
|
||||
|
||||
Loading…
Reference in New Issue
Block a user