diff --git a/examples/serve/aiperf_client.sh b/examples/serve/aiperf_client.sh index 8a150714de..d901f99cde 100755 --- a/examples/serve/aiperf_client.sh +++ b/examples/serve/aiperf_client.sh @@ -2,7 +2,7 @@ aiperf profile \ -m TinyLlama-1.1B-Chat-v1.0 \ - --tokenizer TinyLlama/TinyLlama-1.1B-Chat-v1.0 \ + --tokenizer ${AIPERF_TOKENIZER_PATH:-TinyLlama/TinyLlama-1.1B-Chat-v1.0} \ --endpoint-type chat \ --random-seed 123 \ --synthetic-input-tokens-mean 128 \ diff --git a/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py b/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py index 66677fcead..4b75e4c71f 100644 --- a/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py +++ b/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py @@ -1,17 +1,14 @@ import json import os import subprocess -import sys import tempfile import pytest import yaml +from ..test_llm import get_model_path from .openai_server import RemoteOpenAIServer -sys.path.append(os.path.join(os.path.dirname(__file__), '..')) -from test_llm import get_model_path - @pytest.fixture(scope="module", ids=["TinyLlama-1.1B-Chat"]) def model_name(): @@ -57,15 +54,19 @@ def example_root(): ("bash", "curl_completion_client.sh"), ("bash", "aiperf_client.sh"), ("bash", "curl_responses_client.sh")]) -def test_trtllm_serve_examples(exe: str, script: str, +def test_trtllm_serve_examples(exe: str, script: str, model_name: str, server: RemoteOpenAIServer, example_root: str): client_script = os.path.join(example_root, script) # CalledProcessError will be raised if any errors occur + custom_env = os.environ.copy() + if script.startswith("aiperf"): + custom_env[""] = get_model_path(model_name) result = subprocess.run([exe, client_script], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, - check=True) + check=True, + env=custom_env) if script.startswith("curl"): # For curl scripts, we expect a JSON response result_stdout = result.stdout.strip()