From 9d65b8bf24ec806f71ea23d644522521633d1994 Mon Sep 17 00:00:00 2001 From: Emma Qiao Date: Sat, 24 Jan 2026 14:00:17 +0800 Subject: [PATCH] [None][infra] Fix TRT-LLM data scratch mount point for gb10x (#10880) Signed-off-by: qqiao Signed-off-by: Emma Qiao Co-authored-by: Yanchao Lu --- jenkins/L0_Test.groovy | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/jenkins/L0_Test.groovy b/jenkins/L0_Test.groovy index 92636e7a1b..07597c5457 100644 --- a/jenkins/L0_Test.groovy +++ b/jenkins/L0_Test.groovy @@ -702,6 +702,13 @@ def runLLMTestlistWithAgent(pipeline, platform, testList, config=VANILLA_CONFIG, dockerArgs += " --device=/dev/gdrdrv:/dev/gdrdrv" } } + if (fileExists('/home/scratch.trt_llm_data_ci')) { + dockerArgs += " -v /home/scratch.trt_llm_data_ci:/scratch.trt_llm_data:ro " + } else if (fileExists('/home/scratch.trt_llm_data')) { + dockerArgs += " -v /home/scratch.trt_llm_data:/scratch.trt_llm_data:ro " + } else { + echo "Existing TRT-LLM data scratch mount points cannot be set up in this cluster, ignore..." + } } dockerArgs = "${dockerArgs} " + @@ -710,7 +717,6 @@ def runLLMTestlistWithAgent(pipeline, platform, testList, config=VANILLA_CONFIG, "--entrypoint=\"\" " + "--security-opt seccomp=unconfined " + "-u root:root " + - "-v /home/scratch.trt_llm_data_ci:/scratch.trt_llm_data:ro " + "-v /tmp/ccache:${CCACHE_DIR}:rw " + "-v /tmp/pipcache/http-v2:/root/.cache/pip/http-v2:rw " + "--cap-add=SYSLOG" @@ -1855,7 +1861,10 @@ def createKubernetesPodConfig(image, type, arch = "amd64", gpuCount = 1, perfMod server: 10.117.145.14 path: /vol/scratch1/scratch.michaeln_blossom """ - if (type.contains("6000d") || type.contains("gh200")) { + + // Austin FlexCache looks slow and unstable recently. Remove gh200 temporarily. + // That means gh200 nodes will use the default Blossom data scratch. + if (type.contains("6000d")) { // rtx-pro-6000d and gh200 nodes are located in Austin DC, we use the FlexCache to speed up the data access. llmModelVolume = """ - name: scratch-trt-llm-data