[TRTLLM-4618][feat] Add Nemotron Super 49B FP8 test on RTX6000 Pro (SM120) (#4363)

* added nemotron 49b fp8 for B40 release

Signed-off-by: Faraz Khoubsirat <58580514+farazkh80@users.noreply.github.com>

* add tests to QA list

Signed-off-by: Faraz Khoubsirat <58580514+farazkh80@users.noreply.github.com>

* pre-commit changes

Signed-off-by: Faraz Khoubsirat <58580514+farazkh80@users.noreply.github.com>

---------

Signed-off-by: Faraz Khoubsirat <58580514+farazkh80@users.noreply.github.com>
This commit is contained in:
Faraz 2025-05-18 18:30:24 -07:00 committed by GitHub
parent 7de90a66bc
commit 791c209006
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 8 additions and 0 deletions

View File

@ -1269,6 +1269,9 @@ def test_ptp_quickstart(llm_root, llm_venv):
pytest.param('Llama3.1-70B-FP8',
'llama-3.1-model/Llama-3.1-70B-Instruct-FP8',
marks=skip_pre_hopper),
pytest.param('Nemotron-Super-49B-v1-FP8',
'nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1-FP8',
marks=skip_pre_hopper),
pytest.param('Mixtral-8x7B-NVFP4',
'nvfp4-quantized/Mixtral-8x7B-Instruct-v0.1',
marks=skip_pre_blackwell),
@ -1524,6 +1527,8 @@ def test_ptp_quickstart_advanced_8gpus(llm_root, llm_venv, model_name,
@pytest.mark.skip_less_device(2)
@pytest.mark.parametrize("model_name,model_path", [
("Llama3.1-70B-BF16", "llama-3.1-model/Meta-Llama-3.1-70B"),
('Nemotron-Super-49B-v1-BF16',
'nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1'),
("Mixtral-8x7B-BF16", "Mixtral-8x7B-Instruct-v0.1"),
])
def test_ptp_quickstart_advanced_2gpus_sm120(llm_root, llm_venv, model_name,

View File

@ -24,6 +24,8 @@ test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-FP8-llama-3.1-model/Llama-
test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-NVFP4-nvfp4-quantized/Meta-Llama-3.1-8B]
test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-70B-NVFP4-nvfp4-quantized/Meta-Llama-3.1-70B]
test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-70B-FP8-llama-3.1-model/Llama-3.1-70B-Instruct-FP8]
test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Super-49B-v1-FP8-nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1-FP8]
test_e2e.py::test_ptp_quickstart_advanced[Mixtral-8x7B-NVFP4-nvfp4-quantized/Mixtral-8x7B-Instruct-v0.1]
test_e2e.py::test_ptp_quickstart_advanced_2gpus_sm120[Llama3.1-70B-BF16-llama-3.1-model/Meta-Llama-3.1-70B]
test_e2e.py::test_ptp_quickstart_advanced_2gpus_sm120[Nemotron-Super-49B-v1-BF16-nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1]
test_e2e.py::test_ptp_quickstart_advanced_2gpus_sm120[Mixtral-8x7B-BF16-Mixtral-8x7B-Instruct-v0.1]

View File

@ -25,4 +25,5 @@ l0_rtx_pro_6000:
- test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-FP8-llama-3.1-model/Llama-3.1-8B-Instruct-FP8]
- test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-70B-NVFP4-nvfp4-quantized/Meta-Llama-3.1-70B]
- test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-70B-FP8-llama-3.1-model/Llama-3.1-70B-Instruct-FP8]
- test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Super-49B-v1-FP8-nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1-FP8]
- test_e2e.py::test_ptp_quickstart_advanced[Mixtral-8x7B-NVFP4-nvfp4-quantized/Mixtral-8x7B-Instruct-v0.1]