TensorRT-LLMs/tests/llmapi/fake.sh
石晓伟 8f91cff22e
TensorRT-LLM Release 0.15.0 (#2529)
Co-authored-by: Kaiyu Xie <26294424+kaiyux@users.noreply.github.com>
2024-12-04 13:44:56 +08:00

19 lines
375 B
Bash

#!/bin/bash
set -ex
hf_model_dir=$1
engine_dir=$2
# fake a 1-layer LLaMA model for CI
python3 ../../examples/llama/build.py \
--use_gemm_plugin \
--enable_context_fmha \
--use_gpt_attention_plugin \
--paged_kv_cache \
--remove_input_padding \
--n_layer 1 \
--dtype float16 \
--output_dir $engine_dir
cp $hf_model_dir/tokenizer* $engine_dir