mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-19 17:25:17 +08:00
[None][chore] Enable multiple postprocess workers tests for chat completions api (#7602)
Signed-off-by: Junyi Xu <219237550+JunyiXu-nv@users.noreply.github.com>
This commit is contained in:
parent
b69e3e9f99
commit
a2c45d82c3
@ -14,10 +14,18 @@ def model():
|
||||
return "gpt_oss/gpt-oss-20b/"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module",
|
||||
params=[0, 2],
|
||||
ids=["disable_processpool", "enable_processpool"])
|
||||
def num_postprocess_workers(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def server(model: str):
|
||||
def server(model: str, num_postprocess_workers: int):
|
||||
model_path = get_model_path(model)
|
||||
with RemoteOpenAIServer(model_path) as remote_server:
|
||||
args = ["--num_postprocess_workers", f"{num_postprocess_workers}"]
|
||||
with RemoteOpenAIServer(model_path, args) as remote_server:
|
||||
yield remote_server
|
||||
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user