diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index 702fc0d11d..4792036019 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -227,7 +227,6 @@ examples/test_ray.py::test_ray_disaggregated_serving[tp2] SKIP (https://nvbugs/5 unittest/executor/test_rpc_proxy.py SKIP (https://nvbugs/5605741) unittest/executor/test_rpc_worker.py SKIP (https://nvbugs/5605741) cpp/test_e2e.py::test_model[-redrafter-86] SKIP (https://nvbugs/5761642) -test_e2e.py::test_openai_responses SKIP (https://nvbugs/5804146) triton_server/test_triton.py::test_gpt_gather_logits[gpt-gather-logits] SKIP (https://nvbugs/5766960) full:sm89/accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ctx_pp_gen_tp_asymmetric[MMLU-gen_tp=1-ctx_pp=2] SKIP (https://nvbugs/5596337) test_e2e.py::test_eagle3_output_consistency_4gpus[Qwen3/saved_models_Qwen3-235B-A22B_fp8_hf-Qwen3/qwen3-235B-eagle3] SKIP (https://nvbugs/5685010) diff --git a/tests/unittest/llmapi/apps/_test_openai_responses.py b/tests/unittest/llmapi/apps/_test_openai_responses.py index 08b53490b1..67ae21cd91 100644 --- a/tests/unittest/llmapi/apps/_test_openai_responses.py +++ b/tests/unittest/llmapi/apps/_test_openai_responses.py @@ -13,10 +13,7 @@ pytestmark = pytest.mark.threadleak(enabled=False) @pytest.fixture(scope="module", - params=[ - "gpt_oss/gpt-oss-20b", "DeepSeek-R1-Distill-Qwen-1.5B", - "Qwen3/Qwen3-0.6B" - ]) + params=["gpt_oss/gpt-oss-20b", "Qwen3/Qwen3-0.6B"]) def model(request): return request.param @@ -35,10 +32,6 @@ def server(model: str, num_postprocess_workers: int): args = ["--num_postprocess_workers", f"{num_postprocess_workers}"] if model.startswith("Qwen3"): args.extend(["--reasoning_parser", "qwen3"]) - elif model.startswith("DeepSeek-R1"): - args.extend(["--reasoning_parser", "deepseek-r1"]) - - if not model.startswith("gpt_oss"): args.extend(["--tool_parser", "qwen3"]) with RemoteOpenAIServer(model_path, args) as remote_server: @@ -167,8 +160,8 @@ def get_current_weather(location: str, format: str = "celsius") -> dict: @pytest.mark.asyncio(loop_scope="module") async def test_tool_calls(client: openai.AsyncOpenAI, model: str): - if model.startswith("DeepSeek-R1"): - pytest.skip("DeepSeek-R1 does not support tool calls") + if model.startswith("Qwen3"): + pytest.skip("Qwen3 tool call is not stable") tool_get_current_weather = { "type": "function", @@ -241,8 +234,8 @@ async def test_streaming(client: openai.AsyncOpenAI, model: str): @pytest.mark.asyncio(loop_scope="module") async def test_streaming_tool_call(client: openai.AsyncOpenAI, model: str): - if model.startswith("DeepSeek-R1"): - pytest.skip("DeepSeek-R1 does not support tool calls") + if model.startswith("Qwen3"): + pytest.skip("Qwen3 tool call is not stable") tool_get_current_weather = { "type": "function",