[None][fix] Switch llm api quickstart example location per workflow. (#7182)

Signed-off-by: nv-guomingz <137257613+nv-guomingz@users.noreply.github.com>
This commit is contained in:
Guoming Zhang 2025-08-25 10:17:20 +08:00 committed by GitHub
parent 030598a497
commit 01c5f2f233
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 10 additions and 10 deletions

View File

@ -1,11 +1,17 @@
from tensorrt_llm import LLM, SamplingParams
from tensorrt_llm import BuildConfig, SamplingParams
from tensorrt_llm._tensorrt_engine import LLM # NOTE the change
def main():
build_config = BuildConfig()
build_config.max_batch_size = 256
build_config.max_num_tokens = 1024
# Model could accept HF model name, a path to local HF model,
# or TensorRT Model Optimizer's quantized checkpoints like nvidia/Llama-3.1-8B-Instruct-FP8 on HF.
llm = LLM(model="TinyLlama/TinyLlama-1.1B-Chat-v1.0")
llm = LLM(model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
build_config=build_config)
# Sample prompts.
prompts = [

View File

@ -1,17 +1,11 @@
from tensorrt_llm import BuildConfig, SamplingParams
from tensorrt_llm._tensorrt_engine import LLM # NOTE the change
from tensorrt_llm import LLM, SamplingParams
def main():
build_config = BuildConfig()
build_config.max_batch_size = 256
build_config.max_num_tokens = 1024
# Model could accept HF model name, a path to local HF model,
# or TensorRT Model Optimizer's quantized checkpoints like nvidia/Llama-3.1-8B-Instruct-FP8 on HF.
llm = LLM(model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
build_config=build_config)
llm = LLM(model="TinyLlama/TinyLlama-1.1B-Chat-v1.0")
# Sample prompts.
prompts = [