[https://nvbugs/5747938][fix] Use local tokenizer (#10230)

Signed-off-by: Pengyun Lin <81065165+LinPoly@users.noreply.github.com>
2026-01-13 22:18:36 +08:00 · 2025-12-26 22:08:10 +08:00 · 2025-12-26 22:08:10 +08:00 · 684b37df02
commit 684b37df02
parent c5b0f9e436
2 changed files with 8 additions and 7 deletions
--- a/examples/serve/aiperf_client.sh
+++ b/examples/serve/aiperf_client.sh
@ -2,7 +2,7 @@

 aiperf profile \
    -m TinyLlama-1.1B-Chat-v1.0 \
-    --tokenizer TinyLlama/TinyLlama-1.1B-Chat-v1.0 \
+    --tokenizer ${AIPERF_TOKENIZER_PATH:-TinyLlama/TinyLlama-1.1B-Chat-v1.0} \
    --endpoint-type chat \
    --random-seed 123 \
    --synthetic-input-tokens-mean 128 \
--- a/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py
+++ b/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py
@ -1,17 +1,14 @@
 import json
 import os
 import subprocess
-import sys
 import tempfile

 import pytest
 import yaml

+from ..test_llm import get_model_path
 from .openai_server import RemoteOpenAIServer

-sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
-from test_llm import get_model_path
-

@pytest.fixture(scope="module", ids=["TinyLlama-1.1B-Chat"])
 def model_name():
@ -57,15 +54,19 @@ def example_root():
                    ("bash", "curl_completion_client.sh"),
                    ("bash", "aiperf_client.sh"),
                    ("bash", "curl_responses_client.sh")])
-def test_trtllm_serve_examples(exe: str, script: str,
+def test_trtllm_serve_examples(exe: str, script: str, model_name: str,
                               server: RemoteOpenAIServer, example_root: str):
    client_script = os.path.join(example_root, script)
    # CalledProcessError will be raised if any errors occur
+    custom_env = os.environ.copy()
+    if script.startswith("aiperf"):
+        custom_env[""] = get_model_path(model_name)
    result = subprocess.run([exe, client_script],
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE,
                            text=True,
-                            check=True)
+                            check=True,
+                            env=custom_env)
    if script.startswith("curl"):
        # For curl scripts, we expect a JSON response
        result_stdout = result.stdout.strip()