diff --git a/tests/integration/defs/accuracy/test_llm_api_pytorch.py b/tests/integration/defs/accuracy/test_llm_api_pytorch.py index 050f890bd2..1a32e333b5 100644 --- a/tests/integration/defs/accuracy/test_llm_api_pytorch.py +++ b/tests/integration/defs/accuracy/test_llm_api_pytorch.py @@ -3588,13 +3588,14 @@ class TestQwen3_30B_A3B(LlmapiAccuracyTestHarness): torch_compile_config=torch_compile_config, ) - with LLM(f"{llm_models_root()}/Qwen3/nvidia-Qwen3-30B-A3B-NVFP4", - tensor_parallel_size=tp_size, - pipeline_parallel_size=pp_size, - moe_expert_parallel_size=ep_size, - **pytorch_config, - enable_attention_dp=attention_dp, - max_batch_size=32) as llm: + with LLM( + f"{llm_models_root()}/Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf", + tensor_parallel_size=tp_size, + pipeline_parallel_size=pp_size, + moe_expert_parallel_size=ep_size, + **pytorch_config, + enable_attention_dp=attention_dp, + max_batch_size=32) as llm: task = MMLU(self.MODEL_NAME) task.evaluate(llm) task = GSM8K(self.MODEL_NAME) diff --git a/tests/integration/defs/perf/test_perf.py b/tests/integration/defs/perf/test_perf.py index 05a79824eb..dad536d49e 100644 --- a/tests/integration/defs/perf/test_perf.py +++ b/tests/integration/defs/perf/test_perf.py @@ -120,7 +120,7 @@ MODEL_PATH_DICT = { "qwen3_14b_fp8": "Qwen3/nvidia-Qwen3-14B-FP8", "qwen3_14b_fp4": "Qwen3/nvidia-Qwen3-14B-NVFP4", "qwen3_30b_a3b": "Qwen3/Qwen3-30B-A3B", - "qwen3_30b_a3b_fp4": "Qwen3/nvidia-Qwen3-30B-A3B-NVFP4", + "qwen3_30b_a3b_fp4": "Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf", "qwen3_32b": "Qwen3/Qwen3-32B", "qwen3_32b_fp4": "Qwen3/nvidia-Qwen3-32B-NVFP4", "qwen3_235b_a22b_fp8": "Qwen3/saved_models_Qwen3-235B-A22B_fp8_hf", diff --git a/tests/integration/defs/test_e2e.py b/tests/integration/defs/test_e2e.py index 7e5aca58aa..a4b060eb71 100644 --- a/tests/integration/defs/test_e2e.py +++ b/tests/integration/defs/test_e2e.py @@ -1877,7 +1877,7 @@ def test_ptp_quickstart(llm_root, llm_venv): marks=(skip_pre_hopper, pytest.mark.skip_less_device_memory(40000))), pytest.param( 'Qwen3-30B-A3B_nvfp4_hf', - 'Qwen3/nvidia-Qwen3-30B-A3B-NVFP4', + 'Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf', marks=(skip_pre_blackwell, pytest.mark.skip_less_device_memory(20000))), pytest.param( 'Llama3.3-70B-FP8', diff --git a/tests/integration/test_lists/qa/llm_digits_core.txt b/tests/integration/test_lists/qa/llm_digits_core.txt index 56eadcf348..8bc261a5de 100644 --- a/tests/integration/test_lists/qa/llm_digits_core.txt +++ b/tests/integration/test_lists/qa/llm_digits_core.txt @@ -26,7 +26,7 @@ test_e2e.py::test_ptp_quickstart_advanced[Qwen3-32b-nvfp4-Qwen3/nvidia-Qwen3-32B test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Nano-v2-nvfp4-NVIDIA-Nemotron-Nano-9B-v2-NVFP4] test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Super-49B-v1.5-FP8-nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1_5-FP8] test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B-Qwen3/Qwen3-30B-A3B] -test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B_nvfp4_hf-Qwen3/nvidia-Qwen3-30B-A3B-NVFP4] +test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B_nvfp4_hf-Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf] test_e2e.py::test_ptp_quickstart_advanced[Llama3.3-70B-FP8-modelopt-hf-model-hub/Llama-3.3-70B-Instruct-fp8] test_e2e.py::test_ptp_quickstart_advanced[Llama3.3-70B-FP4-modelopt-hf-model-hub/Llama-3.3-70B-Instruct-fp4] diff --git a/tests/integration/test_lists/qa/llm_digits_func.txt b/tests/integration/test_lists/qa/llm_digits_func.txt index f7d1686e8f..aba46316e2 100644 --- a/tests/integration/test_lists/qa/llm_digits_func.txt +++ b/tests/integration/test_lists/qa/llm_digits_func.txt @@ -12,7 +12,7 @@ test_e2e.py::test_ptp_quickstart_advanced[Qwen3-14B-bf16-Qwen3/Qwen3-14B] test_e2e.py::test_ptp_quickstart_advanced[Qwen3-32B-bf16-Qwen3/Qwen3-32B] test_e2e.py::test_ptp_quickstart_advanced[Qwen3-32b-nvfp4-Qwen3/nvidia-Qwen3-32B-NVFP4] test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B-Qwen3/Qwen3-30B-A3B] -test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B_nvfp4_hf-Qwen3/nvidia-Qwen3-30B-A3B-NVFP4] +test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B_nvfp4_hf-Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf] test_e2e.py::test_ptp_quickstart_advanced[Phi4-Reasoning-Plus-fp8-nvidia-Phi-4-reasoning-plus-FP8] test_e2e.py::test_ptp_quickstart_advanced[Phi4-Reasoning-Plus-nvfp4-nvidia-Phi-4-reasoning-plus-NVFP4] test_e2e.py::test_ptp_quickstart_advanced[Phi-4-reasoning-plus-bf16-Phi-4-reasoning-plus] diff --git a/tests/integration/test_lists/test-db/l0_rtx_pro_6000.yml b/tests/integration/test_lists/test-db/l0_rtx_pro_6000.yml index 45d106f419..5072fdb320 100644 --- a/tests/integration/test_lists/test-db/l0_rtx_pro_6000.yml +++ b/tests/integration/test_lists/test-db/l0_rtx_pro_6000.yml @@ -32,7 +32,7 @@ l0_rtx_pro_6000: - test_e2e.py::test_ptp_quickstart_advanced[Mixtral-8x7B-FP8-Mixtral-8x7B-Instruct-v0.1-fp8] - test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B-Qwen3/Qwen3-30B-A3B] # 3mins - test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B_fp8_hf-Qwen3/saved_models_Qwen3-30B-A3B_fp8_hf] # 3mins - - test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B_nvfp4_hf-Qwen3/nvidia-Qwen3-30B-A3B-NVFP4] # 2mins + - test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B_nvfp4_hf-Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf] # 2mins - test_e2e.py::test_ptp_quickstart_advanced[GPT-OSS-20B-gpt_oss/gpt-oss-20b] - test_e2e.py::test_ptp_quickstart_advanced[GPT-OSS-120B-gpt_oss/gpt-oss-120b] - test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-fp4-multimodals/Phi-4-multimodal-instruct-FP4-image_audio]