[None][fix] Switch llm api quickstart example location per workflow. (#7182)

Signed-off-by: nv-guomingz <137257613+nv-guomingz@users.noreply.github.com>
2026-01-14 06:27:45 +08:00 · 2025-08-25 10:17:20 +08:00 · 2025-08-25 10:17:20 +08:00 · 01c5f2f233
commit 01c5f2f233
parent 030598a497
2 changed files with 10 additions and 10 deletions
--- a/examples/llm-api/_tensorrt_engine/quickstart_example.py
+++ b/examples/llm-api/_tensorrt_engine/quickstart_example.py
@ -1,11 +1,17 @@
-from tensorrt_llm import LLM, SamplingParams
+from tensorrt_llm import BuildConfig, SamplingParams
+from tensorrt_llm._tensorrt_engine import LLM  # NOTE the change


 def main():

+    build_config = BuildConfig()
+    build_config.max_batch_size = 256
+    build_config.max_num_tokens = 1024
+
    # Model could accept HF model name, a path to local HF model,
    # or TensorRT Model Optimizer's quantized checkpoints like nvidia/Llama-3.1-8B-Instruct-FP8 on HF.
-    llm = LLM(model="TinyLlama/TinyLlama-1.1B-Chat-v1.0")
+    llm = LLM(model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+              build_config=build_config)

    # Sample prompts.
    prompts = [
--- a/examples/llm-api/quickstart_example.py
+++ b/examples/llm-api/quickstart_example.py
@ -1,17 +1,11 @@
-from tensorrt_llm import BuildConfig, SamplingParams
-from tensorrt_llm._tensorrt_engine import LLM  # NOTE the change
+from tensorrt_llm import LLM, SamplingParams


 def main():

-    build_config = BuildConfig()
-    build_config.max_batch_size = 256
-    build_config.max_num_tokens = 1024
-
    # Model could accept HF model name, a path to local HF model,
    # or TensorRT Model Optimizer's quantized checkpoints like nvidia/Llama-3.1-8B-Instruct-FP8 on HF.
-    llm = LLM(model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-              build_config=build_config)
+    llm = LLM(model="TinyLlama/TinyLlama-1.1B-Chat-v1.0")

    # Sample prompts.
    prompts = [