From 05aabfbc1e8b961c6c913731a173f3cf14be55d5 Mon Sep 17 00:00:00 2001 From: xiweny <13230610+VALLIS-NERIA@users.noreply.github.com> Date: Wed, 29 Oct 2025 10:47:32 +0800 Subject: [PATCH] [https://nvbugs/5601203] [fix]Restrict fp8 blockscale moe case (#8583) Signed-off-by: Xiwen Yu <13230610+VALLIS-NERIA@users.noreply.github.com> Signed-off-by: Mike Iovine <6158008+mikeiovine@users.noreply.github.com> Signed-off-by: Mike Iovine --- tests/integration/defs/examples/serve/test_serve.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/integration/defs/examples/serve/test_serve.py b/tests/integration/defs/examples/serve/test_serve.py index 1c2a5d68d3..c861d525a2 100755 --- a/tests/integration/defs/examples/serve/test_serve.py +++ b/tests/integration/defs/examples/serve/test_serve.py @@ -2,7 +2,7 @@ import os import time import requests -from defs.conftest import llm_models_root, skip_post_blackwell, skip_pre_hopper +from defs.conftest import llm_models_root, skip_no_hopper from defs.trt_test_alternative import popen, print_error, print_info from openai import OpenAI from requests.exceptions import RequestException @@ -92,10 +92,11 @@ def check_openai_chat_completion(http_port="8000", raise -@skip_pre_hopper -@skip_post_blackwell +@skip_no_hopper def test_extra_llm_api_options(serve_test_root): test_configs_root = f"{serve_test_root}/test_configs" + + # moe backend = CUTLASS which only supports fp8 blockscale on Hopper config_file = f"{test_configs_root}/Qwen3-30B-A3B-FP8.yml" model_path = f"{llm_models_root()}/Qwen3/Qwen3-30B-A3B-FP8"