mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-16 15:55:08 +08:00
[https://nvbugs/5804146][fix] Enable responses tests and remove ds to… (#10925)
Signed-off-by: Junyi Xu <219237550+JunyiXu-nv@users.noreply.github.com> Signed-off-by: Wangshanshan <30051912+dominicshanshan@users.noreply.github.com>
This commit is contained in:
parent
196d94a419
commit
90ea6c1e09
@ -227,7 +227,6 @@ examples/test_ray.py::test_ray_disaggregated_serving[tp2] SKIP (https://nvbugs/5
|
||||
unittest/executor/test_rpc_proxy.py SKIP (https://nvbugs/5605741)
|
||||
unittest/executor/test_rpc_worker.py SKIP (https://nvbugs/5605741)
|
||||
cpp/test_e2e.py::test_model[-redrafter-86] SKIP (https://nvbugs/5761642)
|
||||
test_e2e.py::test_openai_responses SKIP (https://nvbugs/5804146)
|
||||
triton_server/test_triton.py::test_gpt_gather_logits[gpt-gather-logits] SKIP (https://nvbugs/5766960)
|
||||
full:sm89/accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ctx_pp_gen_tp_asymmetric[MMLU-gen_tp=1-ctx_pp=2] SKIP (https://nvbugs/5596337)
|
||||
test_e2e.py::test_eagle3_output_consistency_4gpus[Qwen3/saved_models_Qwen3-235B-A22B_fp8_hf-Qwen3/qwen3-235B-eagle3] SKIP (https://nvbugs/5685010)
|
||||
|
||||
@ -13,10 +13,7 @@ pytestmark = pytest.mark.threadleak(enabled=False)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module",
|
||||
params=[
|
||||
"gpt_oss/gpt-oss-20b", "DeepSeek-R1-Distill-Qwen-1.5B",
|
||||
"Qwen3/Qwen3-0.6B"
|
||||
])
|
||||
params=["gpt_oss/gpt-oss-20b", "Qwen3/Qwen3-0.6B"])
|
||||
def model(request):
|
||||
return request.param
|
||||
|
||||
@ -35,10 +32,6 @@ def server(model: str, num_postprocess_workers: int):
|
||||
args = ["--num_postprocess_workers", f"{num_postprocess_workers}"]
|
||||
if model.startswith("Qwen3"):
|
||||
args.extend(["--reasoning_parser", "qwen3"])
|
||||
elif model.startswith("DeepSeek-R1"):
|
||||
args.extend(["--reasoning_parser", "deepseek-r1"])
|
||||
|
||||
if not model.startswith("gpt_oss"):
|
||||
args.extend(["--tool_parser", "qwen3"])
|
||||
|
||||
with RemoteOpenAIServer(model_path, args) as remote_server:
|
||||
@ -167,8 +160,8 @@ def get_current_weather(location: str, format: str = "celsius") -> dict:
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="module")
|
||||
async def test_tool_calls(client: openai.AsyncOpenAI, model: str):
|
||||
if model.startswith("DeepSeek-R1"):
|
||||
pytest.skip("DeepSeek-R1 does not support tool calls")
|
||||
if model.startswith("Qwen3"):
|
||||
pytest.skip("Qwen3 tool call is not stable")
|
||||
|
||||
tool_get_current_weather = {
|
||||
"type": "function",
|
||||
@ -241,8 +234,8 @@ async def test_streaming(client: openai.AsyncOpenAI, model: str):
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="module")
|
||||
async def test_streaming_tool_call(client: openai.AsyncOpenAI, model: str):
|
||||
if model.startswith("DeepSeek-R1"):
|
||||
pytest.skip("DeepSeek-R1 does not support tool calls")
|
||||
if model.startswith("Qwen3"):
|
||||
pytest.skip("Qwen3 tool call is not stable")
|
||||
|
||||
tool_get_current_weather = {
|
||||
"type": "function",
|
||||
|
||||
Loading…
Reference in New Issue
Block a user