mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-04 18:21:52 +08:00
[https://nvbugs/5754977][fix] Use free port for serve test (#10878)
Signed-off-by: Junyi Xu <219237550+JunyiXu-nv@users.noreply.github.com> Signed-off-by: Wangshanshan <30051912+dominicshanshan@users.noreply.github.com>
This commit is contained in:
parent
0306c0f12c
commit
2a5b8800e1
@ -7,13 +7,14 @@ import time
|
||||
import pytest
|
||||
import requests
|
||||
import yaml
|
||||
from defs.common import get_free_port_in_ci
|
||||
from defs.conftest import llm_models_root, skip_no_hopper
|
||||
from defs.trt_test_alternative import popen, print_error, print_info
|
||||
from openai import OpenAI
|
||||
from requests.exceptions import RequestException
|
||||
|
||||
|
||||
def check_server_ready(http_port="8000", timeout_timer=600, sleep_interval=5):
|
||||
def check_server_ready(http_port, timeout_timer=600, sleep_interval=5):
|
||||
timer = 0
|
||||
while timer <= timeout_timer:
|
||||
try:
|
||||
@ -49,7 +50,7 @@ def wait_for_log(log_queue, expected_log, timeout=10):
|
||||
return False
|
||||
|
||||
|
||||
def check_openai_chat_completion(http_port="8000",
|
||||
def check_openai_chat_completion(http_port,
|
||||
model_name="TinyLlama-1.1B-Chat-v1.0"):
|
||||
"""
|
||||
Test the launched trtllm-serve server using OpenAI client.
|
||||
@ -127,6 +128,7 @@ def test_config_file_loading(serve_test_root, config_flag):
|
||||
assert os.path.exists(
|
||||
model_path), f"model_path does not exist: {model_path}"
|
||||
|
||||
port = get_free_port_in_ci()
|
||||
cmd = [
|
||||
"trtllm-serve",
|
||||
"serve",
|
||||
@ -134,7 +136,7 @@ def test_config_file_loading(serve_test_root, config_flag):
|
||||
"--host",
|
||||
"0.0.0.0",
|
||||
"--port",
|
||||
"8000",
|
||||
str(port),
|
||||
"--backend",
|
||||
"pytorch",
|
||||
config_flag,
|
||||
@ -143,11 +145,11 @@ def test_config_file_loading(serve_test_root, config_flag):
|
||||
|
||||
print_info(f"Launching trtllm-serve with {config_flag}...")
|
||||
with popen(cmd):
|
||||
check_server_ready()
|
||||
check_server_ready(http_port=port)
|
||||
# Extract model name from the model path for consistency
|
||||
model_name = model_path.split('/')[-1] # "Qwen3-30B-A3B-FP8"
|
||||
# Test the server with OpenAI chat completion
|
||||
check_openai_chat_completion(model_name=model_name)
|
||||
check_openai_chat_completion(http_port=port, model_name=model_name)
|
||||
|
||||
|
||||
@skip_no_hopper
|
||||
@ -170,7 +172,7 @@ def test_env_overrides_pdl(tmp_path):
|
||||
}
|
||||
}))
|
||||
|
||||
port = 8001
|
||||
port = get_free_port_in_ci()
|
||||
env = os.environ.copy()
|
||||
# Pre-load with 0 to verify override works
|
||||
env.update({
|
||||
@ -198,7 +200,7 @@ def test_env_overrides_pdl(tmp_path):
|
||||
[output_queue.put(line) for line in iter(proc.stdout.readline, '')],
|
||||
daemon=True).start()
|
||||
|
||||
check_server_ready(http_port=str(port), timeout_timer=300)
|
||||
check_server_ready(http_port=port, timeout_timer=300)
|
||||
response = OpenAI(base_url=f"http://localhost:{port}/v1",
|
||||
api_key="tensorrt_llm").chat.completions.create(
|
||||
model="TinyLlama-1.1B-Chat-v1.0",
|
||||
|
||||
@ -227,7 +227,6 @@ accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B_Instruct_RocketKV::test_auto_d
|
||||
unittest/_torch/sampler/test_return_logits.py SKIP (https://nvbugs/5764627)
|
||||
unittest/_torch/sampler/test_logits_logprobs.py::test_generate_with_return_logits SKIP (https://nvbugs/5764627)
|
||||
unittest/_torch/sampler/test_logits_logprobs.py::test_generate_async_with_return_logits SKIP (https://nvbugs/5764627)
|
||||
examples/serve/test_serve.py::test_config_file_loading[--config] SKIP (https://nvbugs/5754977)
|
||||
examples/test_ray.py::test_ray_disaggregated_serving[tp2] SKIP (https://nvbugs/5612502)
|
||||
unittest/executor/test_rpc_proxy.py SKIP (https://nvbugs/5605741)
|
||||
unittest/executor/test_rpc_worker.py SKIP (https://nvbugs/5605741)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user