restore the model_path Qwen3-30B-A3B-NVFP4 to the original one - model server not synced

Signed-off-by: Jenny Liu <JennyLiu-nv+JennyLiu@users.noreply.github.com>
This commit is contained in:
Jenny Liu 2026-01-12 06:24:43 +00:00
parent 3a1c58987f
commit 747ae7d40b
6 changed files with 13 additions and 12 deletions

View File

@ -3588,13 +3588,14 @@ class TestQwen3_30B_A3B(LlmapiAccuracyTestHarness):
torch_compile_config=torch_compile_config,
)
with LLM(f"{llm_models_root()}/Qwen3/nvidia-Qwen3-30B-A3B-NVFP4",
tensor_parallel_size=tp_size,
pipeline_parallel_size=pp_size,
moe_expert_parallel_size=ep_size,
**pytorch_config,
enable_attention_dp=attention_dp,
max_batch_size=32) as llm:
with LLM(
f"{llm_models_root()}/Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf",
tensor_parallel_size=tp_size,
pipeline_parallel_size=pp_size,
moe_expert_parallel_size=ep_size,
**pytorch_config,
enable_attention_dp=attention_dp,
max_batch_size=32) as llm:
task = MMLU(self.MODEL_NAME)
task.evaluate(llm)
task = GSM8K(self.MODEL_NAME)

View File

@ -120,7 +120,7 @@ MODEL_PATH_DICT = {
"qwen3_14b_fp8": "Qwen3/nvidia-Qwen3-14B-FP8",
"qwen3_14b_fp4": "Qwen3/nvidia-Qwen3-14B-NVFP4",
"qwen3_30b_a3b": "Qwen3/Qwen3-30B-A3B",
"qwen3_30b_a3b_fp4": "Qwen3/nvidia-Qwen3-30B-A3B-NVFP4",
"qwen3_30b_a3b_fp4": "Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf",
"qwen3_32b": "Qwen3/Qwen3-32B",
"qwen3_32b_fp4": "Qwen3/nvidia-Qwen3-32B-NVFP4",
"qwen3_235b_a22b_fp8": "Qwen3/saved_models_Qwen3-235B-A22B_fp8_hf",

View File

@ -1877,7 +1877,7 @@ def test_ptp_quickstart(llm_root, llm_venv):
marks=(skip_pre_hopper, pytest.mark.skip_less_device_memory(40000))),
pytest.param(
'Qwen3-30B-A3B_nvfp4_hf',
'Qwen3/nvidia-Qwen3-30B-A3B-NVFP4',
'Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf',
marks=(skip_pre_blackwell, pytest.mark.skip_less_device_memory(20000))),
pytest.param(
'Llama3.3-70B-FP8',

View File

@ -26,7 +26,7 @@ test_e2e.py::test_ptp_quickstart_advanced[Qwen3-32b-nvfp4-Qwen3/nvidia-Qwen3-32B
test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Nano-v2-nvfp4-NVIDIA-Nemotron-Nano-9B-v2-NVFP4]
test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Super-49B-v1.5-FP8-nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1_5-FP8]
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B-Qwen3/Qwen3-30B-A3B]
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B_nvfp4_hf-Qwen3/nvidia-Qwen3-30B-A3B-NVFP4]
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B_nvfp4_hf-Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf]
test_e2e.py::test_ptp_quickstart_advanced[Llama3.3-70B-FP8-modelopt-hf-model-hub/Llama-3.3-70B-Instruct-fp8]
test_e2e.py::test_ptp_quickstart_advanced[Llama3.3-70B-FP4-modelopt-hf-model-hub/Llama-3.3-70B-Instruct-fp4]

View File

@ -12,7 +12,7 @@ test_e2e.py::test_ptp_quickstart_advanced[Qwen3-14B-bf16-Qwen3/Qwen3-14B]
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-32B-bf16-Qwen3/Qwen3-32B]
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-32b-nvfp4-Qwen3/nvidia-Qwen3-32B-NVFP4]
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B-Qwen3/Qwen3-30B-A3B]
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B_nvfp4_hf-Qwen3/nvidia-Qwen3-30B-A3B-NVFP4]
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B_nvfp4_hf-Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf]
test_e2e.py::test_ptp_quickstart_advanced[Phi4-Reasoning-Plus-fp8-nvidia-Phi-4-reasoning-plus-FP8]
test_e2e.py::test_ptp_quickstart_advanced[Phi4-Reasoning-Plus-nvfp4-nvidia-Phi-4-reasoning-plus-NVFP4]
test_e2e.py::test_ptp_quickstart_advanced[Phi-4-reasoning-plus-bf16-Phi-4-reasoning-plus]

View File

@ -32,7 +32,7 @@ l0_rtx_pro_6000:
- test_e2e.py::test_ptp_quickstart_advanced[Mixtral-8x7B-FP8-Mixtral-8x7B-Instruct-v0.1-fp8]
- test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B-Qwen3/Qwen3-30B-A3B] # 3mins
- test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B_fp8_hf-Qwen3/saved_models_Qwen3-30B-A3B_fp8_hf] # 3mins
- test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B_nvfp4_hf-Qwen3/nvidia-Qwen3-30B-A3B-NVFP4] # 2mins
- test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B_nvfp4_hf-Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf] # 2mins
- test_e2e.py::test_ptp_quickstart_advanced[GPT-OSS-20B-gpt_oss/gpt-oss-20b]
- test_e2e.py::test_ptp_quickstart_advanced[GPT-OSS-120B-gpt_oss/gpt-oss-120b]
- test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-fp4-multimodals/Phi-4-multimodal-instruct-FP4-image_audio]