[https://nvbugs/5601203] [fix]Restrict fp8 blockscale moe case (#8583)

Signed-off-by: Xiwen Yu <13230610+VALLIS-NERIA@users.noreply.github.com>
Signed-off-by: Mike Iovine <6158008+mikeiovine@users.noreply.github.com>
Signed-off-by: Mike Iovine <miovine@nvidia.com>
This commit is contained in:
xiweny 2025-10-29 10:47:32 +08:00 committed by Mike Iovine
parent 8846dac9b4
commit 05aabfbc1e

View File

@ -2,7 +2,7 @@ import os
import time
import requests
from defs.conftest import llm_models_root, skip_post_blackwell, skip_pre_hopper
from defs.conftest import llm_models_root, skip_no_hopper
from defs.trt_test_alternative import popen, print_error, print_info
from openai import OpenAI
from requests.exceptions import RequestException
@ -92,10 +92,11 @@ def check_openai_chat_completion(http_port="8000",
raise
@skip_pre_hopper
@skip_post_blackwell
@skip_no_hopper
def test_extra_llm_api_options(serve_test_root):
test_configs_root = f"{serve_test_root}/test_configs"
# moe backend = CUTLASS which only supports fp8 blockscale on Hopper
config_file = f"{test_configs_root}/Qwen3-30B-A3B-FP8.yml"
model_path = f"{llm_models_root()}/Qwen3/Qwen3-30B-A3B-FP8"