mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
Signed-off-by: Stanley Sun <190317771+StanleySun639@users.noreply.github.com> Signed-off-by: Wangshanshan <30051912+dominicshanshan@users.noreply.github.com> Signed-off-by: ruodil <200874449+ruodil@users.noreply.github.com> Signed-off-by: Yiqing Yan <yiqingy@nvidia.com> Signed-off-by: Yanchao Lu <yanchaol@nvidia.com> Signed-off-by: Balaram Buddharaju <169953907+brb-nv@users.noreply.github.com> Signed-off-by: Ivy Zhang <25222398+crazydemo@users.noreply.github.com> Signed-off-by: Bo Deng <deemod@nvidia.com> Signed-off-by: Chang Liu <9713593+chang-l@users.noreply.github.com> Signed-off-by: Stefan Niebler <82932102+stnie@users.noreply.github.com> Signed-off-by: Yuxian Qiu <142763828+yuxianq@users.noreply.github.com> Signed-off-by: Superjomn <328693+Superjomn@users.noreply.github.com> Signed-off-by: qqiao <qqiao@nvidia.com> Signed-off-by: yechank <161688079+yechank-nvidia@users.noreply.github.com> Signed-off-by: William Zhang <133824995+2ez4bz@users.noreply.github.com> Signed-off-by: raayandhar <rdhar@nvidia.com> Co-authored-by: Stanley Sun <190317771+StanleySun639@users.noreply.github.com> Co-authored-by: ruodil <200874449+ruodil@users.noreply.github.com> Co-authored-by: Yiqing Yan <yiqingy@nvidia.com> Co-authored-by: Yanchao Lu <yanchaol@nvidia.com> Co-authored-by: brb-nv <169953907+brb-nv@users.noreply.github.com> Co-authored-by: Ivy Zhang <25222398+crazydemo@users.noreply.github.com> Co-authored-by: Larry <197874197+LarryXFly@users.noreply.github.com> Co-authored-by: Bo Deng <deemod@nvidia.com> Co-authored-by: Guoming Zhang <137257613+nv-guomingz@users.noreply.github.com> Co-authored-by: Stefan Niebler <82932102+stnie@users.noreply.github.com> Co-authored-by: Yuxian Qiu <142763828+yuxianq@users.noreply.github.com> Co-authored-by: Yan Chunwei <328693+Superjomn@users.noreply.github.com> Co-authored-by: Emma Qiao <qqiao@nvidia.com> Co-authored-by: Yechan Kim <161688079+yechank-nvidia@users.noreply.github.com> Co-authored-by: 2ez4bz <133824995+2ez4bz@users.noreply.github.com> Co-authored-by: Raayan Dhar <58057652+raayandhar@users.noreply.github.com> Co-authored-by: Zhanrui Sun <184402041+ZhanruiSunCh@users.noreply.github.com>
45 lines
1.3 KiB
Python
45 lines
1.3 KiB
Python
#!/usr/bin/env python3
|
|
import os
|
|
from typing import Optional
|
|
|
|
import click
|
|
|
|
from tensorrt_llm._tensorrt_engine import LLM as TrtLLM
|
|
from tensorrt_llm.llmapi import LLM, BuildConfig, SamplingParams
|
|
|
|
|
|
@click.command()
|
|
@click.option("--model_dir", type=str, required=True)
|
|
@click.option("--tp_size", type=int, default=1)
|
|
@click.option("--engine_dir", type=str, default=None)
|
|
@click.option("--backend", type=str, default=None)
|
|
def main(model_dir: str, tp_size: int, engine_dir: str, backend: Optional[str]):
|
|
build_config = BuildConfig()
|
|
build_config.max_batch_size = 8
|
|
build_config.max_input_len = 256
|
|
build_config.max_seq_len = 512
|
|
|
|
backend = backend or "tensorrt"
|
|
assert backend in ["pytorch", "tensorrt"]
|
|
|
|
llm_cls = TrtLLM if backend == "tensorrt" else LLM
|
|
|
|
kwargs = {} if backend == "pytorch" else {"build_config": build_config}
|
|
|
|
llm = llm_cls(model_dir, tensor_parallel_size=tp_size, **kwargs)
|
|
|
|
if engine_dir is not None and os.path.abspath(
|
|
engine_dir) != os.path.abspath(model_dir):
|
|
llm.save(engine_dir)
|
|
|
|
sampling_params = SamplingParams(max_tokens=10, end_id=-1)
|
|
|
|
prompt_token_ids = [45, 12, 13]
|
|
for output in llm.generate([prompt_token_ids],
|
|
sampling_params=sampling_params):
|
|
print(output)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|