TensorRT-LLMs/examples/high-level-api/run_examples.sh
Kaiyu Xie 4bb65f216f
Update TensorRT-LLM (#1274)
* Update TensorRT-LLM

---------

Co-authored-by: meghagarwal <16129366+megha95@users.noreply.github.com>
Co-authored-by: Shixiaowei02 <39303645+Shixiaowei02@users.noreply.github.com>
2024-03-12 18:15:52 +08:00

39 lines
1.0 KiB
Bash
Executable File

#!/bin/bash
set -ex
PROMPT="Tell a story"
LLAMA_MODEL_DIR=$1
default_engine_dir="./tllm.engine.example"
TMP_ENGINE_DIR="${2:-$default_engine_dir}"
python3 llm_examples.py --task run_llm_from_huggingface_model \
--prompt="$PROMPT" \
--hf_model_dir=$LLAMA_MODEL_DIR \
--dump_engine_dir=$TMP_ENGINE_DIR
# TP enabled
python3 llm_examples.py --task run_llm_from_huggingface_model \
--prompt="$PROMPT" \
--hf_model_dir=$LLAMA_MODEL_DIR \
--tp_size=2
python3 llm_examples.py --task run_llm_from_tllm_engine \
--prompt="$PROMPT" \
--hf_model_dir=$LLAMA_MODEL_DIR \
--dump_engine_dir=$TMP_ENGINE_DIR
python3 llm_examples.py --task run_llm_generate_async_example \
--prompt="$PROMPT" \
--hf_model_dir=$LLAMA_MODEL_DIR
# Both TP and streaming enabled
python3 llm_examples.py --task run_llm_generate_async_example \
--prompt="$PROMPT" \
--hf_model_dir=$LLAMA_MODEL_DIR \
--streaming \
--tp_size=2
python3 llm_examples.py --task run_llm_with_async_future \
--prompt="$PROMPT" \
--hf_model_dir=$LLAMA_MODEL_DIR