TensorRT-LLMs/tests/hlapi/fake.sh

#!/bin/bash
set -ex

hf_model_dir=$1
engine_dir=$2

# fake a 1-layer LLaMA model for CI
python3 ../../examples/llama/build.py \
    --use_gemm_plugin \
    --enable_context_fmha \
    --use_gpt_attention_plugin \
    --paged_kv_cache \
    --remove_input_padding \
    --n_layer 1 \
    --dtype float16 \
    --output_dir $engine_dir

cp $hf_model_dir/tokenizer* $engine_dir