[None][infra] Fix TRT-LLM data scratch mount point for gb10x (#10880)

Signed-off-by: qqiao <qqiao@nvidia.com> Signed-off-by: Emma Qiao <qqiao@nvidia.com> Co-authored-by: Yanchao Lu <yanchaol@nvidia.com>
2026-02-05 02:31:33 +08:00 · 2026-01-24 14:00:17 +08:00 · 2026-01-24 14:00:17 +08:00 · 9d65b8bf24
commit 9d65b8bf24
parent 78a008d61a
1 changed files with 11 additions and 2 deletions
--- a/jenkins/L0_Test.groovy
+++ b/jenkins/L0_Test.groovy
@ -702,6 +702,13 @@ def runLLMTestlistWithAgent(pipeline, platform, testList, config=VANILLA_CONFIG,
                            dockerArgs += " --device=/dev/gdrdrv:/dev/gdrdrv"
                        }
                    }
+                    if (fileExists('/home/scratch.trt_llm_data_ci')) {
+                        dockerArgs += " -v /home/scratch.trt_llm_data_ci:/scratch.trt_llm_data:ro "
+                    } else if (fileExists('/home/scratch.trt_llm_data')) {
+                        dockerArgs += " -v /home/scratch.trt_llm_data:/scratch.trt_llm_data:ro "
+                    } else {
+                        echo "Existing TRT-LLM data scratch mount points cannot be set up in this cluster, ignore..."
+                    }
                }

                dockerArgs = "${dockerArgs} " +
@ -710,7 +717,6 @@ def runLLMTestlistWithAgent(pipeline, platform, testList, config=VANILLA_CONFIG,
                    "--entrypoint=\"\" " +
                    "--security-opt seccomp=unconfined " +
                    "-u root:root " +
-                    "-v /home/scratch.trt_llm_data_ci:/scratch.trt_llm_data:ro " +
                    "-v /tmp/ccache:${CCACHE_DIR}:rw " +
                    "-v /tmp/pipcache/http-v2:/root/.cache/pip/http-v2:rw " +
                    "--cap-add=SYSLOG"
@ -1855,7 +1861,10 @@ def createKubernetesPodConfig(image, type, arch = "amd64", gpuCount = 1, perfMod
                    server: 10.117.145.14
                    path: /vol/scratch1/scratch.michaeln_blossom
    """
-    if (type.contains("6000d") || type.contains("gh200")) {
+
+    // Austin FlexCache looks slow and unstable recently. Remove gh200 temporarily.
+    // That means gh200 nodes will use the default Blossom data scratch.
+    if (type.contains("6000d")) {
        // rtx-pro-6000d and gh200 nodes are located in Austin DC, we use the FlexCache to speed up the data access.
        llmModelVolume = """
                - name: scratch-trt-llm-data