mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
[https://nvbugs/5747938][fix] Use local tokenizer (#10230)
Signed-off-by: Pengyun Lin <81065165+LinPoly@users.noreply.github.com>
This commit is contained in:
parent
c5b0f9e436
commit
684b37df02
@ -2,7 +2,7 @@
|
||||
|
||||
aiperf profile \
|
||||
-m TinyLlama-1.1B-Chat-v1.0 \
|
||||
--tokenizer TinyLlama/TinyLlama-1.1B-Chat-v1.0 \
|
||||
--tokenizer ${AIPERF_TOKENIZER_PATH:-TinyLlama/TinyLlama-1.1B-Chat-v1.0} \
|
||||
--endpoint-type chat \
|
||||
--random-seed 123 \
|
||||
--synthetic-input-tokens-mean 128 \
|
||||
|
||||
@ -1,17 +1,14 @@
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
import pytest
|
||||
import yaml
|
||||
|
||||
from ..test_llm import get_model_path
|
||||
from .openai_server import RemoteOpenAIServer
|
||||
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
|
||||
from test_llm import get_model_path
|
||||
|
||||
|
||||
@pytest.fixture(scope="module", ids=["TinyLlama-1.1B-Chat"])
|
||||
def model_name():
|
||||
@ -57,15 +54,19 @@ def example_root():
|
||||
("bash", "curl_completion_client.sh"),
|
||||
("bash", "aiperf_client.sh"),
|
||||
("bash", "curl_responses_client.sh")])
|
||||
def test_trtllm_serve_examples(exe: str, script: str,
|
||||
def test_trtllm_serve_examples(exe: str, script: str, model_name: str,
|
||||
server: RemoteOpenAIServer, example_root: str):
|
||||
client_script = os.path.join(example_root, script)
|
||||
# CalledProcessError will be raised if any errors occur
|
||||
custom_env = os.environ.copy()
|
||||
if script.startswith("aiperf"):
|
||||
custom_env[""] = get_model_path(model_name)
|
||||
result = subprocess.run([exe, client_script],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
check=True)
|
||||
check=True,
|
||||
env=custom_env)
|
||||
if script.startswith("curl"):
|
||||
# For curl scripts, we expect a JSON response
|
||||
result_stdout = result.stdout.strip()
|
||||
|
||||
Loading…
Reference in New Issue
Block a user