[https://nvbugs/5601203] [fix]Restrict fp8 blockscale moe case (#8583)

Signed-off-by: Xiwen Yu <13230610+VALLIS-NERIA@users.noreply.github.com> Signed-off-by: Mike Iovine <6158008+mikeiovine@users.noreply.github.com> Signed-off-by: Mike Iovine <miovine@nvidia.com>
2026-01-14 06:27:45 +08:00 · 2025-10-29 10:47:32 +08:00 · 2025-10-29 10:47:32 +08:00 · 05aabfbc1e
commit 05aabfbc1e
parent 8846dac9b4
1 changed files with 4 additions and 3 deletions
--- a/tests/integration/defs/examples/serve/test_serve.py
+++ b/tests/integration/defs/examples/serve/test_serve.py
@ -2,7 +2,7 @@ import os
 import time

 import requests
-from defs.conftest import llm_models_root, skip_post_blackwell, skip_pre_hopper
+from defs.conftest import llm_models_root, skip_no_hopper
 from defs.trt_test_alternative import popen, print_error, print_info
 from openai import OpenAI
 from requests.exceptions import RequestException
@ -92,10 +92,11 @@ def check_openai_chat_completion(http_port="8000",
        raise


-@skip_pre_hopper
-@skip_post_blackwell
+@skip_no_hopper
 def test_extra_llm_api_options(serve_test_root):
    test_configs_root = f"{serve_test_root}/test_configs"
+
+    # moe backend = CUTLASS which only supports fp8 blockscale on Hopper
    config_file = f"{test_configs_root}/Qwen3-30B-A3B-FP8.yml"
    model_path = f"{llm_models_root()}/Qwen3/Qwen3-30B-A3B-FP8"