mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-05 02:31:33 +08:00
[None][infra] Fix TRT-LLM data scratch mount point for gb10x (#10880)
Signed-off-by: qqiao <qqiao@nvidia.com> Signed-off-by: Emma Qiao <qqiao@nvidia.com> Co-authored-by: Yanchao Lu <yanchaol@nvidia.com>
This commit is contained in:
parent
78a008d61a
commit
9d65b8bf24
@ -702,6 +702,13 @@ def runLLMTestlistWithAgent(pipeline, platform, testList, config=VANILLA_CONFIG,
|
||||
dockerArgs += " --device=/dev/gdrdrv:/dev/gdrdrv"
|
||||
}
|
||||
}
|
||||
if (fileExists('/home/scratch.trt_llm_data_ci')) {
|
||||
dockerArgs += " -v /home/scratch.trt_llm_data_ci:/scratch.trt_llm_data:ro "
|
||||
} else if (fileExists('/home/scratch.trt_llm_data')) {
|
||||
dockerArgs += " -v /home/scratch.trt_llm_data:/scratch.trt_llm_data:ro "
|
||||
} else {
|
||||
echo "Existing TRT-LLM data scratch mount points cannot be set up in this cluster, ignore..."
|
||||
}
|
||||
}
|
||||
|
||||
dockerArgs = "${dockerArgs} " +
|
||||
@ -710,7 +717,6 @@ def runLLMTestlistWithAgent(pipeline, platform, testList, config=VANILLA_CONFIG,
|
||||
"--entrypoint=\"\" " +
|
||||
"--security-opt seccomp=unconfined " +
|
||||
"-u root:root " +
|
||||
"-v /home/scratch.trt_llm_data_ci:/scratch.trt_llm_data:ro " +
|
||||
"-v /tmp/ccache:${CCACHE_DIR}:rw " +
|
||||
"-v /tmp/pipcache/http-v2:/root/.cache/pip/http-v2:rw " +
|
||||
"--cap-add=SYSLOG"
|
||||
@ -1855,7 +1861,10 @@ def createKubernetesPodConfig(image, type, arch = "amd64", gpuCount = 1, perfMod
|
||||
server: 10.117.145.14
|
||||
path: /vol/scratch1/scratch.michaeln_blossom
|
||||
"""
|
||||
if (type.contains("6000d") || type.contains("gh200")) {
|
||||
|
||||
// Austin FlexCache looks slow and unstable recently. Remove gh200 temporarily.
|
||||
// That means gh200 nodes will use the default Blossom data scratch.
|
||||
if (type.contains("6000d")) {
|
||||
// rtx-pro-6000d and gh200 nodes are located in Austin DC, we use the FlexCache to speed up the data access.
|
||||
llmModelVolume = """
|
||||
- name: scratch-trt-llm-data
|
||||
|
||||
Loading…
Reference in New Issue
Block a user