mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
restore the model_path Qwen3-30B-A3B-NVFP4 to the original one - model server not synced
Signed-off-by: Jenny Liu <JennyLiu-nv+JennyLiu@users.noreply.github.com>
This commit is contained in:
parent
3a1c58987f
commit
747ae7d40b
@ -3588,13 +3588,14 @@ class TestQwen3_30B_A3B(LlmapiAccuracyTestHarness):
|
||||
torch_compile_config=torch_compile_config,
|
||||
)
|
||||
|
||||
with LLM(f"{llm_models_root()}/Qwen3/nvidia-Qwen3-30B-A3B-NVFP4",
|
||||
tensor_parallel_size=tp_size,
|
||||
pipeline_parallel_size=pp_size,
|
||||
moe_expert_parallel_size=ep_size,
|
||||
**pytorch_config,
|
||||
enable_attention_dp=attention_dp,
|
||||
max_batch_size=32) as llm:
|
||||
with LLM(
|
||||
f"{llm_models_root()}/Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf",
|
||||
tensor_parallel_size=tp_size,
|
||||
pipeline_parallel_size=pp_size,
|
||||
moe_expert_parallel_size=ep_size,
|
||||
**pytorch_config,
|
||||
enable_attention_dp=attention_dp,
|
||||
max_batch_size=32) as llm:
|
||||
task = MMLU(self.MODEL_NAME)
|
||||
task.evaluate(llm)
|
||||
task = GSM8K(self.MODEL_NAME)
|
||||
|
||||
@ -120,7 +120,7 @@ MODEL_PATH_DICT = {
|
||||
"qwen3_14b_fp8": "Qwen3/nvidia-Qwen3-14B-FP8",
|
||||
"qwen3_14b_fp4": "Qwen3/nvidia-Qwen3-14B-NVFP4",
|
||||
"qwen3_30b_a3b": "Qwen3/Qwen3-30B-A3B",
|
||||
"qwen3_30b_a3b_fp4": "Qwen3/nvidia-Qwen3-30B-A3B-NVFP4",
|
||||
"qwen3_30b_a3b_fp4": "Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf",
|
||||
"qwen3_32b": "Qwen3/Qwen3-32B",
|
||||
"qwen3_32b_fp4": "Qwen3/nvidia-Qwen3-32B-NVFP4",
|
||||
"qwen3_235b_a22b_fp8": "Qwen3/saved_models_Qwen3-235B-A22B_fp8_hf",
|
||||
|
||||
@ -1877,7 +1877,7 @@ def test_ptp_quickstart(llm_root, llm_venv):
|
||||
marks=(skip_pre_hopper, pytest.mark.skip_less_device_memory(40000))),
|
||||
pytest.param(
|
||||
'Qwen3-30B-A3B_nvfp4_hf',
|
||||
'Qwen3/nvidia-Qwen3-30B-A3B-NVFP4',
|
||||
'Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf',
|
||||
marks=(skip_pre_blackwell, pytest.mark.skip_less_device_memory(20000))),
|
||||
pytest.param(
|
||||
'Llama3.3-70B-FP8',
|
||||
|
||||
@ -26,7 +26,7 @@ test_e2e.py::test_ptp_quickstart_advanced[Qwen3-32b-nvfp4-Qwen3/nvidia-Qwen3-32B
|
||||
test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Nano-v2-nvfp4-NVIDIA-Nemotron-Nano-9B-v2-NVFP4]
|
||||
test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Super-49B-v1.5-FP8-nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1_5-FP8]
|
||||
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B-Qwen3/Qwen3-30B-A3B]
|
||||
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B_nvfp4_hf-Qwen3/nvidia-Qwen3-30B-A3B-NVFP4]
|
||||
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B_nvfp4_hf-Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf]
|
||||
test_e2e.py::test_ptp_quickstart_advanced[Llama3.3-70B-FP8-modelopt-hf-model-hub/Llama-3.3-70B-Instruct-fp8]
|
||||
test_e2e.py::test_ptp_quickstart_advanced[Llama3.3-70B-FP4-modelopt-hf-model-hub/Llama-3.3-70B-Instruct-fp4]
|
||||
|
||||
|
||||
@ -12,7 +12,7 @@ test_e2e.py::test_ptp_quickstart_advanced[Qwen3-14B-bf16-Qwen3/Qwen3-14B]
|
||||
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-32B-bf16-Qwen3/Qwen3-32B]
|
||||
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-32b-nvfp4-Qwen3/nvidia-Qwen3-32B-NVFP4]
|
||||
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B-Qwen3/Qwen3-30B-A3B]
|
||||
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B_nvfp4_hf-Qwen3/nvidia-Qwen3-30B-A3B-NVFP4]
|
||||
test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B_nvfp4_hf-Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf]
|
||||
test_e2e.py::test_ptp_quickstart_advanced[Phi4-Reasoning-Plus-fp8-nvidia-Phi-4-reasoning-plus-FP8]
|
||||
test_e2e.py::test_ptp_quickstart_advanced[Phi4-Reasoning-Plus-nvfp4-nvidia-Phi-4-reasoning-plus-NVFP4]
|
||||
test_e2e.py::test_ptp_quickstart_advanced[Phi-4-reasoning-plus-bf16-Phi-4-reasoning-plus]
|
||||
|
||||
@ -32,7 +32,7 @@ l0_rtx_pro_6000:
|
||||
- test_e2e.py::test_ptp_quickstart_advanced[Mixtral-8x7B-FP8-Mixtral-8x7B-Instruct-v0.1-fp8]
|
||||
- test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B-Qwen3/Qwen3-30B-A3B] # 3mins
|
||||
- test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B_fp8_hf-Qwen3/saved_models_Qwen3-30B-A3B_fp8_hf] # 3mins
|
||||
- test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B_nvfp4_hf-Qwen3/nvidia-Qwen3-30B-A3B-NVFP4] # 2mins
|
||||
- test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B_nvfp4_hf-Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf] # 2mins
|
||||
- test_e2e.py::test_ptp_quickstart_advanced[GPT-OSS-20B-gpt_oss/gpt-oss-20b]
|
||||
- test_e2e.py::test_ptp_quickstart_advanced[GPT-OSS-120B-gpt_oss/gpt-oss-120b]
|
||||
- test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-fp4-multimodals/Phi-4-multimodal-instruct-FP4-image_audio]
|
||||
|
||||
Loading…
Reference in New Issue
Block a user