diff --git a/tests/integration/defs/perf/test_perf.py b/tests/integration/defs/perf/test_perf.py index dad536d49e..255b183e3c 100644 --- a/tests/integration/defs/perf/test_perf.py +++ b/tests/integration/defs/perf/test_perf.py @@ -169,7 +169,7 @@ MODEL_PATH_DICT = { "mistral_small_v3.1_24b": "Mistral-Small-3.1-24B-Instruct-2503", "gpt_oss_120b_fp4": "gpt_oss/gpt-oss-120b", "gpt_oss_20b_fp4": "gpt_oss/gpt-oss-20b", - "nemotron_nano_9b_v2": "NVIDIA-Nemotron-Nano-12B-v2", + "nemotron_nano_12b_v2": "NVIDIA-Nemotron-Nano-12B-v2", "nvidia_nemotron_nano_9b_v2_nvfp4": "NVIDIA-Nemotron-Nano-9B-v2-NVFP4", "starcoder2_7b": "starcoder2-7b", } diff --git a/tests/integration/defs/test_e2e.py b/tests/integration/defs/test_e2e.py index a4b060eb71..3a63c4c191 100644 --- a/tests/integration/defs/test_e2e.py +++ b/tests/integration/defs/test_e2e.py @@ -1907,14 +1907,13 @@ def test_ptp_quickstart(llm_root, llm_venv): 'modelopt-hf-model-hub/Llama-3.1-8B-Instruct-fp4', marks=skip_pre_blackwell), pytest.param( - 'Qwen3-8b-fp8', 'Qwen3/nvidia-Qwen3-8B-FP8', marks=skip_pre_blackwell), + 'Qwen3-8b-fp8', 'Qwen3/nvidia-Qwen3-8B-FP8', marks=skip_pre_hopper), pytest.param('Qwen3-8b-nvfp4', 'Qwen3/nvidia-Qwen3-8B-NVFP4', marks=skip_pre_blackwell), ("Qwen3-8B-bf16", "Qwen3/Qwen3-8B"), - pytest.param('Qwen3-14b-fp8', - 'Qwen3/nvidia-Qwen3-14B-FP8', - marks=skip_pre_blackwell), + pytest.param( + 'Qwen3-14b-fp8', 'Qwen3/nvidia-Qwen3-14B-FP8', marks=skip_pre_hopper), pytest.param('Qwen3-14b-nvfp4', 'Qwen3/nvidia-Qwen3-14B-NVFP4', marks=skip_pre_blackwell), @@ -1925,25 +1924,25 @@ def test_ptp_quickstart(llm_root, llm_venv): ("Qwen3-32B-bf16", "Qwen3/Qwen3-32B"), pytest.param('Phi4-Reasoning-Plus-fp8', 'nvidia-Phi-4-reasoning-plus-FP8', - marks=skip_pre_blackwell), + marks=skip_pre_hopper), pytest.param('Phi4-Reasoning-Plus-nvfp4', 'nvidia-Phi-4-reasoning-plus-NVFP4', marks=skip_pre_blackwell), ("Phi-4-reasoning-plus-bf16", "Phi-4-reasoning-plus"), pytest.param('Nemotron-Super-49B-v1.5-FP8', 'nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1_5-FP8', - marks=skip_pre_blackwell), + marks=skip_pre_hopper), pytest.param('Llama-4-Scout-17B-16E-FP4', 'llama4-models/Llama-4-Scout-17B-16E-Instruct-FP4', marks=skip_pre_blackwell), - pytest.param('Nemotron-Nano-v2-nvfp4', + pytest.param('Nemotron-Nano-9B-v2-nvfp4', 'NVIDIA-Nemotron-Nano-9B-v2-NVFP4', marks=skip_pre_blackwell), ]) def test_ptp_quickstart_advanced(llm_root, llm_venv, model_name, model_path): print(f"Testing {model_name}.") example_root = Path(os.path.join(llm_root, "examples", "llm-api")) - if model_name in ("Nemotron-H-8B", "Nemotron-Nano-v2-nvfp4"): + if model_name in ("Nemotron-H-8B", "Nemotron-Nano-9B-v2-nvfp4"): llm_venv.run_cmd([ str(example_root / "quickstart_advanced.py"), "--disable_kv_cache_reuse", diff --git a/tests/integration/test_lists/qa/llm_digits_core.txt b/tests/integration/test_lists/qa/llm_digits_core.txt index 8bc261a5de..2da9bbb00d 100644 --- a/tests/integration/test_lists/qa/llm_digits_core.txt +++ b/tests/integration/test_lists/qa/llm_digits_core.txt @@ -23,7 +23,7 @@ test_e2e.py::test_ptp_quickstart_advanced[Phi4-Reasoning-Plus-nvfp4-nvidia-Phi-4 test_e2e.py::test_ptp_quickstart_advanced[Phi-4-reasoning-plus-bf16-Phi-4-reasoning-plus] test_e2e.py::test_ptp_quickstart_advanced[Qwen3-32B-bf16-Qwen3/Qwen3-32B] test_e2e.py::test_ptp_quickstart_advanced[Qwen3-32b-nvfp4-Qwen3/nvidia-Qwen3-32B-NVFP4] -test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Nano-v2-nvfp4-NVIDIA-Nemotron-Nano-9B-v2-NVFP4] +test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Nano-9B-v2-nvfp4-NVIDIA-Nemotron-Nano-9B-v2-NVFP4] test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Super-49B-v1.5-FP8-nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1_5-FP8] test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B-Qwen3/Qwen3-30B-A3B] test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B_nvfp4_hf-Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf] diff --git a/tests/integration/test_lists/qa/llm_digits_func.txt b/tests/integration/test_lists/qa/llm_digits_func.txt index aba46316e2..05a2e5e1b7 100644 --- a/tests/integration/test_lists/qa/llm_digits_func.txt +++ b/tests/integration/test_lists/qa/llm_digits_func.txt @@ -22,7 +22,7 @@ test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-70B-FP8-llama-3.1-model/Llama test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Super-49B-v1.5-FP8-nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1_5-FP8] test_e2e.py::test_ptp_quickstart_advanced[Llama-4-Scout-17B-16E-FP4-llama4-models/Llama-4-Scout-17B-16E-Instruct-FP4] test_e2e.py::test_ptp_quickstart_advanced[DeepSeek-R1-Distill-Qwen-32B-DeepSeek-R1/DeepSeek-R1-Distill-Qwen-32B] -test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Nano-v2-nvfp4-NVIDIA-Nemotron-Nano-9B-v2-NVFP4] +test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Nano-9B-v2-nvfp4-NVIDIA-Nemotron-Nano-9B-v2-NVFP4] test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-image] test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-audio] test_e2e.py::test_ptp_quickstart_multimodal_phi4mm[phi4-multimodal-instruct-multimodals/Phi-4-multimodal-instruct-image_audio] diff --git a/tests/integration/test_lists/qa/llm_perf_sanity.yml b/tests/integration/test_lists/qa/llm_perf_sanity.yml index 771f4acd95..a499a85d27 100644 --- a/tests/integration/test_lists/qa/llm_perf_sanity.yml +++ b/tests/integration/test_lists/qa/llm_perf_sanity.yml @@ -38,7 +38,7 @@ llm_perf_sanity: # Ministral-8B - perf/test_perf.py::test_perf[ministral_8b-bench-pytorch-bfloat16-input_output_len:500,2000-reqs:500-con:250] - perf/test_perf.py::test_perf[phi_4_mini_instruct-bench-pytorch-bfloat16-input_output_len:500,2000] - - perf/test_perf.py::test_perf[nemotron_nano_9b_v2-bench-pytorch-bfloat16-input_output_len:512,512] + - perf/test_perf.py::test_perf[nemotron_nano_12b_v2-bench-pytorch-bfloat16-input_output_len:512,512] - perf/test_perf.py::test_perf[qwen3_4b_eagle3-bench-pytorch-streaming-bfloat16-maxbs:4-kv_frac:0.6-input_output_len:500,100-reqs:200-con:4] - perf/test_perf.py::test_perf[llama_v3.1_8b-bench-pytorch-bfloat16-maxbs:256-input_output_len:128,128-gpus:2] - perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-streaming-bfloat16-input_output_len:128,128-gpus:2]