mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-04 18:21:52 +08:00
* Update TensorRT-LLM --------- Co-authored-by: Shixiaowei02 <39303645+Shixiaowei02@users.noreply.github.com>
21 lines
593 B
Python
21 lines
593 B
Python
import argparse
|
|
from asyncio import run
|
|
from pathlib import Path
|
|
|
|
from executor import GenerationExecutor
|
|
|
|
|
|
async def main(model_dir: Path, tokenizer: Path | str):
|
|
engine = GenerationExecutor(model_dir, tokenizer)
|
|
text = "deep learning is"
|
|
async for response in engine.generate(prompt=text, max_new_tokens=16):
|
|
print(response.text)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("model_dir", type=Path)
|
|
parser.add_argument("tokenizer", type=Path)
|
|
args = parser.parse_args()
|
|
run(main(args.model_dir, args.tokenizer))
|