From df845a028b84d2d1ff02ffcaf2631d0007f4f79a Mon Sep 17 00:00:00 2001 From: Zhanrui Sun <184402041+ZhanruiSunCh@users.noreply.github.com> Date: Mon, 19 Jan 2026 13:40:40 +0800 Subject: [PATCH] [TRTLLM-9581][infra] Use /home/scratch.trt_llm_data_ci in computelab (#10616) Signed-off-by: ZhanruiSunCh <184402041+ZhanruiSunCh@users.noreply.github.com> Signed-off-by: Zhanrui Sun <184402041+ZhanruiSunCh@users.noreply.github.com> --- .../docker-compose.override-example.yml | 2 +- examples/llm-api/llm_sparse_attention.py | 2 +- examples/models/contrib/bloom/README.md | 4 ++-- examples/models/core/bert/utils.py | 4 ++-- jenkins/L0_Test.groovy | 4 ++-- tests/integration/defs/conftest.py | 2 +- tests/microbenchmarks/README.md | 2 +- tests/scripts/perf-sanity/benchmark-serve.sh | 4 ++-- .../perf-sanity/run_benchmark_serve.py | 2 +- .../unittest/others/test_llama_conversion.sh | 22 +++++++++---------- 10 files changed, 24 insertions(+), 24 deletions(-) diff --git a/.devcontainer/docker-compose.override-example.yml b/.devcontainer/docker-compose.override-example.yml index 67f4f3d0c4..a0f46a4324 100644 --- a/.devcontainer/docker-compose.override-example.yml +++ b/.devcontainer/docker-compose.override-example.yml @@ -5,4 +5,4 @@ services: volumes: # Uncomment the following lines to enable # # Mount TRTLLM data volume: - # - /home/scratch.trt_llm_data/:/home/scratch.trt_llm_data/:ro + # - /home/scratch.trt_llm_data_ci/:/home/scratch.trt_llm_data_ci/:ro diff --git a/examples/llm-api/llm_sparse_attention.py b/examples/llm-api/llm_sparse_attention.py index ce052c3367..df98e3b5c4 100644 --- a/examples/llm-api/llm_sparse_attention.py +++ b/examples/llm-api/llm_sparse_attention.py @@ -37,7 +37,7 @@ def parse_arguments(): '--model_path', type=str, default= - "/home/scratch.trt_llm_data/llm-models/llama-3.1-model/Llama-3.1-8B-Instruct" + "/home/scratch.trt_llm_data_ci/llm-models/llama-3.1-model/Llama-3.1-8B-Instruct" ) parser.add_argument( '--input_file', diff --git a/examples/models/contrib/bloom/README.md b/examples/models/contrib/bloom/README.md index e0ab0ad655..4542d6147f 100644 --- a/examples/models/contrib/bloom/README.md +++ b/examples/models/contrib/bloom/README.md @@ -189,7 +189,7 @@ Note we use `--bin_model_dir` instead of `--model_dir` since SmoothQuant model n ``` # Quantize HF Bloom 3B into FP8 and export trtllm checkpoint -python ../../../quantization/quantize.py --model_dir /home/scratch.trt_llm_data/llm-models/bloom-3b \ +python ../../../quantization/quantize.py --model_dir /home/scratch.trt_llm_data_ci/llm-models/bloom-3b \ --dtype float16 \ --qformat fp8 \ --kv_cache_dtype fp8 \ @@ -230,7 +230,7 @@ mpirun -n 8 --allow-run-as-root \ --engine_dir ./bloom/176B/trt_engines/fp16/8-gpu/ python ../../../summarize.py --test_trt_llm \ - --hf_model_dir /home/scratch.trt_llm_data/llm-models/bloom-3b \ + --hf_model_dir /home/scratch.trt_llm_data_ci/llm-models/bloom-3b \ --data_type fp16 \ --engine_dir /tmp/bloom/3b/trt_engines/fp8/1-gpu/ ``` diff --git a/examples/models/core/bert/utils.py b/examples/models/core/bert/utils.py index 82bf5382aa..b3714e523e 100644 --- a/examples/models/core/bert/utils.py +++ b/examples/models/core/bert/utils.py @@ -18,7 +18,7 @@ from transformers import RobertaConfig, RobertaPreTrainedModel, RobertaForQuesti # NOTE: This routine is copied from from tests/unittests/utils/llm_data.py def llm_models_root(check=False) -> Optional[Path]: - root = Path("/home/scratch.trt_llm_data/llm-models/") + root = Path("/home/scratch.trt_llm_data_ci/llm-models/") if "LLM_MODELS_ROOT" in os.environ: root = Path(os.environ.get("LLM_MODELS_ROOT")) @@ -28,7 +28,7 @@ def llm_models_root(check=False) -> Optional[Path]: if check: assert root.exists(), \ - "You shall set LLM_MODELS_ROOT env or be able to access /home/scratch.trt_llm_data to run this test" + "You shall set LLM_MODELS_ROOT env or be able to access /home/scratch.trt_llm_data_ci to run this test" return root if root.exists() else None diff --git a/jenkins/L0_Test.groovy b/jenkins/L0_Test.groovy index 606463d160..92636e7a1b 100644 --- a/jenkins/L0_Test.groovy +++ b/jenkins/L0_Test.groovy @@ -710,7 +710,7 @@ def runLLMTestlistWithAgent(pipeline, platform, testList, config=VANILLA_CONFIG, "--entrypoint=\"\" " + "--security-opt seccomp=unconfined " + "-u root:root " + - "-v /home/scratch.trt_llm_data:/scratch.trt_llm_data:ro " + + "-v /home/scratch.trt_llm_data_ci:/scratch.trt_llm_data:ro " + "-v /tmp/ccache:${CCACHE_DIR}:rw " + "-v /tmp/pipcache/http-v2:/root/.cache/pip/http-v2:rw " + "--cap-add=SYSLOG" @@ -892,7 +892,7 @@ def getMountListForSlurmTest(SlurmCluster cluster, boolean useSbatch = false) // data/cache mounts if (cluster.containerRuntime.toString() == "DOCKER") { mounts += [ - "/home/scratch.trt_llm_data:/scratch.trt_llm_data:ro", + "/home/scratch.trt_llm_data_ci:/scratch.trt_llm_data:ro", ] } else if (cluster.containerRuntime.toString() == "ENROOT") { if (!cluster.scratchPath) { diff --git a/tests/integration/defs/conftest.py b/tests/integration/defs/conftest.py index c06b0d18bc..411c43179d 100644 --- a/tests/integration/defs/conftest.py +++ b/tests/integration/defs/conftest.py @@ -83,7 +83,7 @@ def wget(url, out): def llm_models_root() -> str: """Return LLM_MODELS_ROOT path if it is set in env, assert when it's set but not a valid path.""" - root = Path("/home/scratch.trt_llm_data/llm-models/") + root = Path("/home/scratch.trt_llm_data_ci/llm-models/") if "LLM_MODELS_ROOT" in os.environ: root = Path(os.environ.get("LLM_MODELS_ROOT")) diff --git a/tests/microbenchmarks/README.md b/tests/microbenchmarks/README.md index 48bce4ec89..884e02004a 100644 --- a/tests/microbenchmarks/README.md +++ b/tests/microbenchmarks/README.md @@ -16,7 +16,7 @@ python ./build_time_benchmark.py --model "TinyLlama/TinyLlama_v1.1" # no weights python ./build_time_benchmark.py --model "openai-community/gpt2" --load # with weights loading # example 3: benchmark a local download HF model -python ./build_time_benchmark.py --model /home/scratch.trt_llm_data/llm-models/falcon-rw-1b/ +python ./build_time_benchmark.py --model /home/scratch.trt_llm_data_ci/llm-models/falcon-rw-1b/ # example 4: benchmark one model with managed weights option, with verbose option python ./build_time_benchmark.py --model llama2-70b.TP4 --managed_weights -v diff --git a/tests/scripts/perf-sanity/benchmark-serve.sh b/tests/scripts/perf-sanity/benchmark-serve.sh index 4621f571e8..3d2d142849 100755 --- a/tests/scripts/perf-sanity/benchmark-serve.sh +++ b/tests/scripts/perf-sanity/benchmark-serve.sh @@ -44,7 +44,7 @@ report_head() { run_benchmark_and_parse() { # Run benchmark and parse results in a single Docker container - mount=" -v /home/scratch.trt_llm_data:/home/scratch.trt_llm_data:ro -v $output_dir:$output_dir:rw -v $bench_dir:$bench_dir:ro" + mount=" -v /home/scratch.trt_llm_data_ci:/home/scratch.trt_llm_data_ci:ro -v $output_dir:$output_dir:rw -v $bench_dir:$bench_dir:ro" if [[ -n "$trtllm_dir" && -d "$trtllm_dir" ]]; then mount="$mount -v $trtllm_dir:$trtllm_dir:ro" fi @@ -56,7 +56,7 @@ run_benchmark_and_parse() { ${IMAGE} \ bash -c " echo 'Running benchmarks...' - export LLM_MODELS_ROOT=/home/scratch.trt_llm_data/llm-models + export LLM_MODELS_ROOT=/home/scratch.trt_llm_data_ci/llm-models # Handle trtllm_dir parameter if [[ -n \"$trtllm_dir\" && -d \"$trtllm_dir\" ]]; then diff --git a/tests/scripts/perf-sanity/run_benchmark_serve.py b/tests/scripts/perf-sanity/run_benchmark_serve.py index 627b5d980d..8636c815d0 100644 --- a/tests/scripts/perf-sanity/run_benchmark_serve.py +++ b/tests/scripts/perf-sanity/run_benchmark_serve.py @@ -166,7 +166,7 @@ HF_MODEL_PATH = { } LLM_MODELS_ROOT = os.environ.get('LLM_MODELS_ROOT', - '/home/scratch.trt_llm_data/llm-models') + '/home/scratch.trt_llm_data_ci/llm-models') # Model path mapping diff --git a/tests/unittest/others/test_llama_conversion.sh b/tests/unittest/others/test_llama_conversion.sh index 49140fc9b2..ed5883e786 100755 --- a/tests/unittest/others/test_llama_conversion.sh +++ b/tests/unittest/others/test_llama_conversion.sh @@ -1,7 +1,7 @@ set -ex export PATH=~/.local/bin/:$PATH # trtllm-build is inside ~/.local/bin -export MODEL=/home/scratch.trt_llm_data/llm-models/llama-models/llama-7b-hf/ +export MODEL=/home/scratch.trt_llm_data_ci/llm-models/llama-models/llama-7b-hf/ test_fake_config() { python3 convert_checkpoint.py --dtype float16 --n_layer 2 --output_dir ./c-model/llama-7b/fp16 @@ -13,12 +13,12 @@ test_fake_config() { } test_meta() { - python convert_checkpoint.py --meta_ckpt_dir /home/scratch.trt_llm_data/llm-models/llama-models-v2/7B/ --output_dir ./tllm_checkpoint/llama-v2-7b-ckpt-from-meta --tp_size 2 + python convert_checkpoint.py --meta_ckpt_dir /home/scratch.trt_llm_data_ci/llm-models/llama-models-v2/7B/ --output_dir ./tllm_checkpoint/llama-v2-7b-ckpt-from-meta --tp_size 2 trtllm-build --checkpoint_dir ./tllm_checkpoint/llama-v2-7b-ckpt-from-meta --output_dir ./trt_engines/llama-v2-7b-engine-tp2-meta --gemm_plugin float16 mpirun -n 2 --allow-run-as-root \ python ../summarize.py --test_trt_llm \ --tensorrt_llm_rouge1_threshold 18 \ - --hf_model_dir /home/scratch.trt_llm_data/llm-models/llama-models-v2/llama-v2-7b-hf/ \ + --hf_model_dir /home/scratch.trt_llm_data_ci/llm-models/llama-models-v2/llama-v2-7b-hf/ \ --data_type fp16 \ --engine_dir ./trt_engines/llama-v2-7b-engine-tp2-meta \ --test_hf @@ -80,7 +80,7 @@ test_gptq() { python convert_checkpoint.py --model_dir ${MODEL} \ --output_dir ./tllm_checkpoint/2gpu_gptq \ --dtype float16 \ - --quant_ckpt_path /home/scratch.trt_llm_data/llm-models/int4-quantized-gptq-awq/llama-7b-4bit-gs128.safetensors \ + --quant_ckpt_path /home/scratch.trt_llm_data_ci/llm-models/int4-quantized-gptq-awq/llama-7b-4bit-gs128.safetensors \ --use_weight_only \ --weight_only_precision int4_gptq \ --per_group \ @@ -100,8 +100,8 @@ test_gptq() { } test_lora() { - lora_dir=/home/scratch.trt_llm_data/llm-models/llama-models-v2/chinese-llama-2-lora-13b - python convert_checkpoint.py --model_dir /home/scratch.trt_llm_data/llm-models/llama-models-v2/llama-v2-13b-hf \ + lora_dir=/home/scratch.trt_llm_data_ci/llm-models/llama-models-v2/chinese-llama-2-lora-13b + python convert_checkpoint.py --model_dir /home/scratch.trt_llm_data_ci/llm-models/llama-models-v2/llama-v2-13b-hf \ --output_dir ./tllm_checkpoint/2gpu_lora \ --dtype float16 \ --tp_size 2 @@ -126,7 +126,7 @@ test_lora() { } test_mixtral() { - python convert_checkpoint.py --model_dir /home/scratch.trt_llm_data/llm-models/Mixtral-8x7B-v0.1/ \ + python convert_checkpoint.py --model_dir /home/scratch.trt_llm_data_ci/llm-models/Mixtral-8x7B-v0.1/ \ --output_dir ./tllm_checkpoint/mixtral_2gpu \ --dtype float16 \ --pp_size 2 \ @@ -137,7 +137,7 @@ test_mixtral() { } test_long_alpaca_rope_scaling() { - python convert_checkpoint.py --model_dir /home/scratch.trt_llm_data/llm-models/LongAlpaca-7B/ \ + python convert_checkpoint.py --model_dir /home/scratch.trt_llm_data_ci/llm-models/LongAlpaca-7B/ \ --output_dir ./tllm_checkpoint/long_alpaca_tp2 \ --dtype float16 \ --tp_size 2 @@ -152,12 +152,12 @@ test_long_alpaca_rope_scaling() { --max_input_length 32768 \ --input_file ../../tests/integration/test_input_files/pg64317_sanitized.txt \ --engine_dir ./trt_engines/long_alpaca_tp2 \ - --tokenizer_dir /home/scratch.trt_llm_data/llm-models/LongAlpaca-7B/ + --tokenizer_dir /home/scratch.trt_llm_data_ci/llm-models/LongAlpaca-7B/ } test_llava() { python ../llama/convert_checkpoint.py \ - --model_dir /home/scratch.trt_llm_data/llm-models/llava-1.5-7b-hf/ \ + --model_dir /home/scratch.trt_llm_data_ci/llm-models/llava-1.5-7b-hf/ \ --output_dir ./trt_checkpoint/llava-1gpu \ --dtype float16 @@ -172,7 +172,7 @@ test_llava() { } test_bfloat16() { - python convert_checkpoint.py --output_dir ./tllm_checkpoint/llama_v2-summarization/bfloat16/1-gpu --dtype=bfloat16 --tp_size=1 --pp_size=1 --model_dir /home/scratch.trt_llm_data/llm-models/llama-models-v2/llama-v2-7b-hf + python convert_checkpoint.py --output_dir ./tllm_checkpoint/llama_v2-summarization/bfloat16/1-gpu --dtype=bfloat16 --tp_size=1 --pp_size=1 --model_dir /home/scratch.trt_llm_data_ci/llm-models/llama-models-v2/llama-v2-7b-hf } test_all()