Fix tests to expect RequestError for max_num_tokens validation in PyTorch backend

Signed-off-by: tzulingk@nvidia.com <tzulingk@nvidia.com>
2026-01-13 22:18:36 +08:00 · 2025-12-14 18:02:11 -08:00 · 2025-12-14 18:02:11 -08:00 · febc86ced0
commit febc86ced0
parent 92411c4348
1 changed files with 5 additions and 8 deletions
--- a/tests/unittest/llmapi/test_llm.py
+++ b/tests/unittest/llmapi/test_llm.py
@ -2393,7 +2393,8 @@ def test_llm_chunked_prefill():
                  enable_chunked_prefill=False,
                  fast_build=True)

-        with pytest.raises(ValueError):
+        # max_num_tokens validation now raises RequestError consistently
+        with pytest.raises(RequestError):
            output = llm.generate_async(
                "A " * build_config.max_num_tokens,
                sampling_params=sampling_params,
@ -2436,13 +2437,9 @@ def _test_llm_capture_request_error(pytorch_backend: bool, tp_size: int = 1):
    )

    prompt = 'A ' * 65  # the minimum max_num_tokens is 64
-    if pytorch_backend:
-        # pytorch backend will raise ValueError for max_num_tokens
-        with pytest.raises(ValueError):
-            llm.generate(prompt)
-    else:
-        with pytest.raises(RequestError):
-            llm.generate(prompt)
+    # Both backends now consistently raise RequestError for max_num_tokens validation
+    with pytest.raises(RequestError):
+        llm.generate(prompt)


 def test_llm_capture_request_error():