mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-27 14:13:34 +08:00
* Update TensorRT-LLM --------- Co-authored-by: meghagarwal <16129366+megha95@users.noreply.github.com> Co-authored-by: Shixiaowei02 <39303645+Shixiaowei02@users.noreply.github.com>
39 lines
1.0 KiB
Bash
Executable File
39 lines
1.0 KiB
Bash
Executable File
#!/bin/bash
|
|
set -ex
|
|
|
|
PROMPT="Tell a story"
|
|
LLAMA_MODEL_DIR=$1
|
|
default_engine_dir="./tllm.engine.example"
|
|
TMP_ENGINE_DIR="${2:-$default_engine_dir}"
|
|
|
|
python3 llm_examples.py --task run_llm_from_huggingface_model \
|
|
--prompt="$PROMPT" \
|
|
--hf_model_dir=$LLAMA_MODEL_DIR \
|
|
--dump_engine_dir=$TMP_ENGINE_DIR
|
|
|
|
# TP enabled
|
|
python3 llm_examples.py --task run_llm_from_huggingface_model \
|
|
--prompt="$PROMPT" \
|
|
--hf_model_dir=$LLAMA_MODEL_DIR \
|
|
--tp_size=2
|
|
|
|
python3 llm_examples.py --task run_llm_from_tllm_engine \
|
|
--prompt="$PROMPT" \
|
|
--hf_model_dir=$LLAMA_MODEL_DIR \
|
|
--dump_engine_dir=$TMP_ENGINE_DIR
|
|
|
|
python3 llm_examples.py --task run_llm_generate_async_example \
|
|
--prompt="$PROMPT" \
|
|
--hf_model_dir=$LLAMA_MODEL_DIR
|
|
|
|
# Both TP and streaming enabled
|
|
python3 llm_examples.py --task run_llm_generate_async_example \
|
|
--prompt="$PROMPT" \
|
|
--hf_model_dir=$LLAMA_MODEL_DIR \
|
|
--streaming \
|
|
--tp_size=2
|
|
|
|
python3 llm_examples.py --task run_llm_with_async_future \
|
|
--prompt="$PROMPT" \
|
|
--hf_model_dir=$LLAMA_MODEL_DIR
|