TensorRT-LLMs/examples/server/async.py
Kaiyu Xie 5955b8afba
Update TensorRT-LLM Release branch (#1192)
* Update TensorRT-LLM

---------

Co-authored-by: Shixiaowei02 <39303645+Shixiaowei02@users.noreply.github.com>
2024-02-29 17:20:55 +08:00

21 lines
593 B
Python

import argparse
from asyncio import run
from pathlib import Path
from executor import GenerationExecutor
async def main(model_dir: Path, tokenizer: Path | str):
engine = GenerationExecutor(model_dir, tokenizer)
text = "deep learning is"
async for response in engine.generate(prompt=text, max_new_tokens=16):
print(response.text)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("model_dir", type=Path)
parser.add_argument("tokenizer", type=Path)
args = parser.parse_args()
run(main(args.model_dir, args.tokenizer))