[https://nvbugs/5747938][fix] Use local tokenizer (#10230)

Signed-off-by: Pengyun Lin <81065165+LinPoly@users.noreply.github.com>
This commit is contained in:
Pengyun Lin 2025-12-26 22:08:10 +08:00 committed by GitHub
parent c5b0f9e436
commit 684b37df02
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 8 additions and 7 deletions

View File

@ -2,7 +2,7 @@
aiperf profile \
-m TinyLlama-1.1B-Chat-v1.0 \
--tokenizer TinyLlama/TinyLlama-1.1B-Chat-v1.0 \
--tokenizer ${AIPERF_TOKENIZER_PATH:-TinyLlama/TinyLlama-1.1B-Chat-v1.0} \
--endpoint-type chat \
--random-seed 123 \
--synthetic-input-tokens-mean 128 \

View File

@ -1,17 +1,14 @@
import json
import os
import subprocess
import sys
import tempfile
import pytest
import yaml
from ..test_llm import get_model_path
from .openai_server import RemoteOpenAIServer
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
from test_llm import get_model_path
@pytest.fixture(scope="module", ids=["TinyLlama-1.1B-Chat"])
def model_name():
@ -57,15 +54,19 @@ def example_root():
("bash", "curl_completion_client.sh"),
("bash", "aiperf_client.sh"),
("bash", "curl_responses_client.sh")])
def test_trtllm_serve_examples(exe: str, script: str,
def test_trtllm_serve_examples(exe: str, script: str, model_name: str,
server: RemoteOpenAIServer, example_root: str):
client_script = os.path.join(example_root, script)
# CalledProcessError will be raised if any errors occur
custom_env = os.environ.copy()
if script.startswith("aiperf"):
custom_env[""] = get_model_path(model_name)
result = subprocess.run([exe, client_script],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=True)
check=True,
env=custom_env)
if script.startswith("curl"):
# For curl scripts, we expect a JSON response
result_stdout = result.stdout.strip()