mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
* Update TensorRT-LLM --------- Co-authored-by: Shixiaowei02 <39303645+Shixiaowei02@users.noreply.github.com>
19 lines
375 B
Bash
19 lines
375 B
Bash
#!/bin/bash
|
|
set -ex
|
|
|
|
hf_model_dir=$1
|
|
engine_dir=$2
|
|
|
|
# fake a 1-layer LLaMA model for CI
|
|
python3 ../../examples/llama/build.py \
|
|
--use_gemm_plugin \
|
|
--enable_context_fmha \
|
|
--use_gpt_attention_plugin \
|
|
--paged_kv_cache \
|
|
--remove_input_padding \
|
|
--n_layer 1 \
|
|
--dtype float16 \
|
|
--output_dir $engine_dir
|
|
|
|
cp $hf_model_dir/tokenizer* $engine_dir
|