TensorRT-LLMs/examples/high-level-api/run_examples.sh
Kaiyu Xie d879430b04
Update TensorRT-LLM (#846)
* Update TensorRT-LLM

---------

Co-authored-by: Shixiaowei02 <39303645+Shixiaowei02@users.noreply.github.com>
2024-01-09 21:03:35 +08:00

30 lines
845 B
Bash

#!/bin/bash
set -ex
PROMPT="Tell a story"
LLAMA_MODEL_DIR=$1
python3 llm_examples.py --task run_llm_from_huggingface_model \
--prompt="$PROMPT" \
--hf_model_dir=$LLAMA_MODEL_DIR \
--dump_engine_dir=./tllm.engine.example
python3 llm_examples.py --task run_llm_from_tllm_engine \
--prompt="$PROMPT" \
--hf_model_dir=$LLAMA_MODEL_DIR \
--dump_engine_dir=./tllm.engine.example
python3 llm_examples.py --task run_llm_on_tensor_parallel \
--prompt="$PROMPT" \
--hf_model_dir=$LLAMA_MODEL_DIR
python3 llm_examples.py --task run_llm_generate_async_example \
--prompt="$PROMPT" \
--hf_model_dir=$LLAMA_MODEL_DIR
python3 llm_examples.py --task run_llm_with_quantization \
--prompt="$PROMPT" \
--hf_model_dir=$LLAMA_MODEL_DIR \
--dump_engine_dir=./tllm.engine.example \
--quant_type="int4_awq"