mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-03 17:52:19 +08:00
[TRTLLM-9581][infra] Use /home/scratch.trt_llm_data_ci in computelab (#10616)
Signed-off-by: ZhanruiSunCh <184402041+ZhanruiSunCh@users.noreply.github.com> Signed-off-by: Zhanrui Sun <184402041+ZhanruiSunCh@users.noreply.github.com>
This commit is contained in:
parent
68ab1a47c4
commit
df845a028b
@ -5,4 +5,4 @@ services:
|
||||
volumes:
|
||||
# Uncomment the following lines to enable
|
||||
# # Mount TRTLLM data volume:
|
||||
# - /home/scratch.trt_llm_data/:/home/scratch.trt_llm_data/:ro
|
||||
# - /home/scratch.trt_llm_data_ci/:/home/scratch.trt_llm_data_ci/:ro
|
||||
|
||||
@ -37,7 +37,7 @@ def parse_arguments():
|
||||
'--model_path',
|
||||
type=str,
|
||||
default=
|
||||
"/home/scratch.trt_llm_data/llm-models/llama-3.1-model/Llama-3.1-8B-Instruct"
|
||||
"/home/scratch.trt_llm_data_ci/llm-models/llama-3.1-model/Llama-3.1-8B-Instruct"
|
||||
)
|
||||
parser.add_argument(
|
||||
'--input_file',
|
||||
|
||||
@ -189,7 +189,7 @@ Note we use `--bin_model_dir` instead of `--model_dir` since SmoothQuant model n
|
||||
|
||||
```
|
||||
# Quantize HF Bloom 3B into FP8 and export trtllm checkpoint
|
||||
python ../../../quantization/quantize.py --model_dir /home/scratch.trt_llm_data/llm-models/bloom-3b \
|
||||
python ../../../quantization/quantize.py --model_dir /home/scratch.trt_llm_data_ci/llm-models/bloom-3b \
|
||||
--dtype float16 \
|
||||
--qformat fp8 \
|
||||
--kv_cache_dtype fp8 \
|
||||
@ -230,7 +230,7 @@ mpirun -n 8 --allow-run-as-root \
|
||||
--engine_dir ./bloom/176B/trt_engines/fp16/8-gpu/
|
||||
|
||||
python ../../../summarize.py --test_trt_llm \
|
||||
--hf_model_dir /home/scratch.trt_llm_data/llm-models/bloom-3b \
|
||||
--hf_model_dir /home/scratch.trt_llm_data_ci/llm-models/bloom-3b \
|
||||
--data_type fp16 \
|
||||
--engine_dir /tmp/bloom/3b/trt_engines/fp8/1-gpu/
|
||||
```
|
||||
|
||||
@ -18,7 +18,7 @@ from transformers import RobertaConfig, RobertaPreTrainedModel, RobertaForQuesti
|
||||
|
||||
# NOTE: This routine is copied from from tests/unittests/utils/llm_data.py
|
||||
def llm_models_root(check=False) -> Optional[Path]:
|
||||
root = Path("/home/scratch.trt_llm_data/llm-models/")
|
||||
root = Path("/home/scratch.trt_llm_data_ci/llm-models/")
|
||||
|
||||
if "LLM_MODELS_ROOT" in os.environ:
|
||||
root = Path(os.environ.get("LLM_MODELS_ROOT"))
|
||||
@ -28,7 +28,7 @@ def llm_models_root(check=False) -> Optional[Path]:
|
||||
|
||||
if check:
|
||||
assert root.exists(), \
|
||||
"You shall set LLM_MODELS_ROOT env or be able to access /home/scratch.trt_llm_data to run this test"
|
||||
"You shall set LLM_MODELS_ROOT env or be able to access /home/scratch.trt_llm_data_ci to run this test"
|
||||
|
||||
return root if root.exists() else None
|
||||
|
||||
|
||||
@ -710,7 +710,7 @@ def runLLMTestlistWithAgent(pipeline, platform, testList, config=VANILLA_CONFIG,
|
||||
"--entrypoint=\"\" " +
|
||||
"--security-opt seccomp=unconfined " +
|
||||
"-u root:root " +
|
||||
"-v /home/scratch.trt_llm_data:/scratch.trt_llm_data:ro " +
|
||||
"-v /home/scratch.trt_llm_data_ci:/scratch.trt_llm_data:ro " +
|
||||
"-v /tmp/ccache:${CCACHE_DIR}:rw " +
|
||||
"-v /tmp/pipcache/http-v2:/root/.cache/pip/http-v2:rw " +
|
||||
"--cap-add=SYSLOG"
|
||||
@ -892,7 +892,7 @@ def getMountListForSlurmTest(SlurmCluster cluster, boolean useSbatch = false)
|
||||
// data/cache mounts
|
||||
if (cluster.containerRuntime.toString() == "DOCKER") {
|
||||
mounts += [
|
||||
"/home/scratch.trt_llm_data:/scratch.trt_llm_data:ro",
|
||||
"/home/scratch.trt_llm_data_ci:/scratch.trt_llm_data:ro",
|
||||
]
|
||||
} else if (cluster.containerRuntime.toString() == "ENROOT") {
|
||||
if (!cluster.scratchPath) {
|
||||
|
||||
@ -83,7 +83,7 @@ def wget(url, out):
|
||||
def llm_models_root() -> str:
|
||||
"""Return LLM_MODELS_ROOT path if it is set in env, assert when it's set but not a valid path."""
|
||||
|
||||
root = Path("/home/scratch.trt_llm_data/llm-models/")
|
||||
root = Path("/home/scratch.trt_llm_data_ci/llm-models/")
|
||||
if "LLM_MODELS_ROOT" in os.environ:
|
||||
root = Path(os.environ.get("LLM_MODELS_ROOT"))
|
||||
|
||||
|
||||
@ -16,7 +16,7 @@ python ./build_time_benchmark.py --model "TinyLlama/TinyLlama_v1.1" # no weights
|
||||
python ./build_time_benchmark.py --model "openai-community/gpt2" --load # with weights loading
|
||||
|
||||
# example 3: benchmark a local download HF model
|
||||
python ./build_time_benchmark.py --model /home/scratch.trt_llm_data/llm-models/falcon-rw-1b/
|
||||
python ./build_time_benchmark.py --model /home/scratch.trt_llm_data_ci/llm-models/falcon-rw-1b/
|
||||
|
||||
# example 4: benchmark one model with managed weights option, with verbose option
|
||||
python ./build_time_benchmark.py --model llama2-70b.TP4 --managed_weights -v
|
||||
|
||||
@ -44,7 +44,7 @@ report_head() {
|
||||
|
||||
run_benchmark_and_parse() {
|
||||
# Run benchmark and parse results in a single Docker container
|
||||
mount=" -v /home/scratch.trt_llm_data:/home/scratch.trt_llm_data:ro -v $output_dir:$output_dir:rw -v $bench_dir:$bench_dir:ro"
|
||||
mount=" -v /home/scratch.trt_llm_data_ci:/home/scratch.trt_llm_data_ci:ro -v $output_dir:$output_dir:rw -v $bench_dir:$bench_dir:ro"
|
||||
if [[ -n "$trtllm_dir" && -d "$trtllm_dir" ]]; then
|
||||
mount="$mount -v $trtllm_dir:$trtllm_dir:ro"
|
||||
fi
|
||||
@ -56,7 +56,7 @@ run_benchmark_and_parse() {
|
||||
${IMAGE} \
|
||||
bash -c "
|
||||
echo 'Running benchmarks...'
|
||||
export LLM_MODELS_ROOT=/home/scratch.trt_llm_data/llm-models
|
||||
export LLM_MODELS_ROOT=/home/scratch.trt_llm_data_ci/llm-models
|
||||
|
||||
# Handle trtllm_dir parameter
|
||||
if [[ -n \"$trtllm_dir\" && -d \"$trtllm_dir\" ]]; then
|
||||
|
||||
@ -166,7 +166,7 @@ HF_MODEL_PATH = {
|
||||
}
|
||||
|
||||
LLM_MODELS_ROOT = os.environ.get('LLM_MODELS_ROOT',
|
||||
'/home/scratch.trt_llm_data/llm-models')
|
||||
'/home/scratch.trt_llm_data_ci/llm-models')
|
||||
|
||||
|
||||
# Model path mapping
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
set -ex
|
||||
|
||||
export PATH=~/.local/bin/:$PATH # trtllm-build is inside ~/.local/bin
|
||||
export MODEL=/home/scratch.trt_llm_data/llm-models/llama-models/llama-7b-hf/
|
||||
export MODEL=/home/scratch.trt_llm_data_ci/llm-models/llama-models/llama-7b-hf/
|
||||
|
||||
test_fake_config() {
|
||||
python3 convert_checkpoint.py --dtype float16 --n_layer 2 --output_dir ./c-model/llama-7b/fp16
|
||||
@ -13,12 +13,12 @@ test_fake_config() {
|
||||
}
|
||||
|
||||
test_meta() {
|
||||
python convert_checkpoint.py --meta_ckpt_dir /home/scratch.trt_llm_data/llm-models/llama-models-v2/7B/ --output_dir ./tllm_checkpoint/llama-v2-7b-ckpt-from-meta --tp_size 2
|
||||
python convert_checkpoint.py --meta_ckpt_dir /home/scratch.trt_llm_data_ci/llm-models/llama-models-v2/7B/ --output_dir ./tllm_checkpoint/llama-v2-7b-ckpt-from-meta --tp_size 2
|
||||
trtllm-build --checkpoint_dir ./tllm_checkpoint/llama-v2-7b-ckpt-from-meta --output_dir ./trt_engines/llama-v2-7b-engine-tp2-meta --gemm_plugin float16
|
||||
mpirun -n 2 --allow-run-as-root \
|
||||
python ../summarize.py --test_trt_llm \
|
||||
--tensorrt_llm_rouge1_threshold 18 \
|
||||
--hf_model_dir /home/scratch.trt_llm_data/llm-models/llama-models-v2/llama-v2-7b-hf/ \
|
||||
--hf_model_dir /home/scratch.trt_llm_data_ci/llm-models/llama-models-v2/llama-v2-7b-hf/ \
|
||||
--data_type fp16 \
|
||||
--engine_dir ./trt_engines/llama-v2-7b-engine-tp2-meta \
|
||||
--test_hf
|
||||
@ -80,7 +80,7 @@ test_gptq() {
|
||||
python convert_checkpoint.py --model_dir ${MODEL} \
|
||||
--output_dir ./tllm_checkpoint/2gpu_gptq \
|
||||
--dtype float16 \
|
||||
--quant_ckpt_path /home/scratch.trt_llm_data/llm-models/int4-quantized-gptq-awq/llama-7b-4bit-gs128.safetensors \
|
||||
--quant_ckpt_path /home/scratch.trt_llm_data_ci/llm-models/int4-quantized-gptq-awq/llama-7b-4bit-gs128.safetensors \
|
||||
--use_weight_only \
|
||||
--weight_only_precision int4_gptq \
|
||||
--per_group \
|
||||
@ -100,8 +100,8 @@ test_gptq() {
|
||||
}
|
||||
|
||||
test_lora() {
|
||||
lora_dir=/home/scratch.trt_llm_data/llm-models/llama-models-v2/chinese-llama-2-lora-13b
|
||||
python convert_checkpoint.py --model_dir /home/scratch.trt_llm_data/llm-models/llama-models-v2/llama-v2-13b-hf \
|
||||
lora_dir=/home/scratch.trt_llm_data_ci/llm-models/llama-models-v2/chinese-llama-2-lora-13b
|
||||
python convert_checkpoint.py --model_dir /home/scratch.trt_llm_data_ci/llm-models/llama-models-v2/llama-v2-13b-hf \
|
||||
--output_dir ./tllm_checkpoint/2gpu_lora \
|
||||
--dtype float16 \
|
||||
--tp_size 2
|
||||
@ -126,7 +126,7 @@ test_lora() {
|
||||
}
|
||||
|
||||
test_mixtral() {
|
||||
python convert_checkpoint.py --model_dir /home/scratch.trt_llm_data/llm-models/Mixtral-8x7B-v0.1/ \
|
||||
python convert_checkpoint.py --model_dir /home/scratch.trt_llm_data_ci/llm-models/Mixtral-8x7B-v0.1/ \
|
||||
--output_dir ./tllm_checkpoint/mixtral_2gpu \
|
||||
--dtype float16 \
|
||||
--pp_size 2 \
|
||||
@ -137,7 +137,7 @@ test_mixtral() {
|
||||
}
|
||||
|
||||
test_long_alpaca_rope_scaling() {
|
||||
python convert_checkpoint.py --model_dir /home/scratch.trt_llm_data/llm-models/LongAlpaca-7B/ \
|
||||
python convert_checkpoint.py --model_dir /home/scratch.trt_llm_data_ci/llm-models/LongAlpaca-7B/ \
|
||||
--output_dir ./tllm_checkpoint/long_alpaca_tp2 \
|
||||
--dtype float16 \
|
||||
--tp_size 2
|
||||
@ -152,12 +152,12 @@ test_long_alpaca_rope_scaling() {
|
||||
--max_input_length 32768 \
|
||||
--input_file ../../tests/integration/test_input_files/pg64317_sanitized.txt \
|
||||
--engine_dir ./trt_engines/long_alpaca_tp2 \
|
||||
--tokenizer_dir /home/scratch.trt_llm_data/llm-models/LongAlpaca-7B/
|
||||
--tokenizer_dir /home/scratch.trt_llm_data_ci/llm-models/LongAlpaca-7B/
|
||||
}
|
||||
|
||||
test_llava() {
|
||||
python ../llama/convert_checkpoint.py \
|
||||
--model_dir /home/scratch.trt_llm_data/llm-models/llava-1.5-7b-hf/ \
|
||||
--model_dir /home/scratch.trt_llm_data_ci/llm-models/llava-1.5-7b-hf/ \
|
||||
--output_dir ./trt_checkpoint/llava-1gpu \
|
||||
--dtype float16
|
||||
|
||||
@ -172,7 +172,7 @@ test_llava() {
|
||||
}
|
||||
|
||||
test_bfloat16() {
|
||||
python convert_checkpoint.py --output_dir ./tllm_checkpoint/llama_v2-summarization/bfloat16/1-gpu --dtype=bfloat16 --tp_size=1 --pp_size=1 --model_dir /home/scratch.trt_llm_data/llm-models/llama-models-v2/llama-v2-7b-hf
|
||||
python convert_checkpoint.py --output_dir ./tllm_checkpoint/llama_v2-summarization/bfloat16/1-gpu --dtype=bfloat16 --tp_size=1 --pp_size=1 --model_dir /home/scratch.trt_llm_data_ci/llm-models/llama-models-v2/llama-v2-7b-hf
|
||||
}
|
||||
|
||||
test_all()
|
||||
|
||||
Loading…
Reference in New Issue
Block a user