TensorRT-LLMs/tests/hlapi/fake.sh
Kaiyu Xie d879430b04
Update TensorRT-LLM (#846)
* Update TensorRT-LLM

---------

Co-authored-by: Shixiaowei02 <39303645+Shixiaowei02@users.noreply.github.com>
2024-01-09 21:03:35 +08:00

19 lines
375 B
Bash

#!/bin/bash
set -ex
hf_model_dir=$1
engine_dir=$2
# fake a 1-layer LLaMA model for CI
python3 ../../examples/llama/build.py \
--use_gemm_plugin \
--enable_context_fmha \
--use_gpt_attention_plugin \
--paged_kv_cache \
--remove_input_padding \
--n_layer 1 \
--dtype float16 \
--output_dir $engine_dir
cp $hf_model_dir/tokenizer* $engine_dir