mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-18 16:55:08 +08:00
[TRTLLM-4618][feat] Add Nemotron Super 49B FP8 test on RTX6000 Pro (SM120) (#4363)
* added nemotron 49b fp8 for B40 release Signed-off-by: Faraz Khoubsirat <58580514+farazkh80@users.noreply.github.com> * add tests to QA list Signed-off-by: Faraz Khoubsirat <58580514+farazkh80@users.noreply.github.com> * pre-commit changes Signed-off-by: Faraz Khoubsirat <58580514+farazkh80@users.noreply.github.com> --------- Signed-off-by: Faraz Khoubsirat <58580514+farazkh80@users.noreply.github.com>
This commit is contained in:
parent
7de90a66bc
commit
791c209006
@ -1269,6 +1269,9 @@ def test_ptp_quickstart(llm_root, llm_venv):
|
||||
pytest.param('Llama3.1-70B-FP8',
|
||||
'llama-3.1-model/Llama-3.1-70B-Instruct-FP8',
|
||||
marks=skip_pre_hopper),
|
||||
pytest.param('Nemotron-Super-49B-v1-FP8',
|
||||
'nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1-FP8',
|
||||
marks=skip_pre_hopper),
|
||||
pytest.param('Mixtral-8x7B-NVFP4',
|
||||
'nvfp4-quantized/Mixtral-8x7B-Instruct-v0.1',
|
||||
marks=skip_pre_blackwell),
|
||||
@ -1524,6 +1527,8 @@ def test_ptp_quickstart_advanced_8gpus(llm_root, llm_venv, model_name,
|
||||
@pytest.mark.skip_less_device(2)
|
||||
@pytest.mark.parametrize("model_name,model_path", [
|
||||
("Llama3.1-70B-BF16", "llama-3.1-model/Meta-Llama-3.1-70B"),
|
||||
('Nemotron-Super-49B-v1-BF16',
|
||||
'nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1'),
|
||||
("Mixtral-8x7B-BF16", "Mixtral-8x7B-Instruct-v0.1"),
|
||||
])
|
||||
def test_ptp_quickstart_advanced_2gpus_sm120(llm_root, llm_venv, model_name,
|
||||
|
||||
@ -24,6 +24,8 @@ test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-FP8-llama-3.1-model/Llama-
|
||||
test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-NVFP4-nvfp4-quantized/Meta-Llama-3.1-8B]
|
||||
test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-70B-NVFP4-nvfp4-quantized/Meta-Llama-3.1-70B]
|
||||
test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-70B-FP8-llama-3.1-model/Llama-3.1-70B-Instruct-FP8]
|
||||
test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Super-49B-v1-FP8-nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1-FP8]
|
||||
test_e2e.py::test_ptp_quickstart_advanced[Mixtral-8x7B-NVFP4-nvfp4-quantized/Mixtral-8x7B-Instruct-v0.1]
|
||||
test_e2e.py::test_ptp_quickstart_advanced_2gpus_sm120[Llama3.1-70B-BF16-llama-3.1-model/Meta-Llama-3.1-70B]
|
||||
test_e2e.py::test_ptp_quickstart_advanced_2gpus_sm120[Nemotron-Super-49B-v1-BF16-nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1]
|
||||
test_e2e.py::test_ptp_quickstart_advanced_2gpus_sm120[Mixtral-8x7B-BF16-Mixtral-8x7B-Instruct-v0.1]
|
||||
|
||||
@ -25,4 +25,5 @@ l0_rtx_pro_6000:
|
||||
- test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-FP8-llama-3.1-model/Llama-3.1-8B-Instruct-FP8]
|
||||
- test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-70B-NVFP4-nvfp4-quantized/Meta-Llama-3.1-70B]
|
||||
- test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-70B-FP8-llama-3.1-model/Llama-3.1-70B-Instruct-FP8]
|
||||
- test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Super-49B-v1-FP8-nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1-FP8]
|
||||
- test_e2e.py::test_ptp_quickstart_advanced[Mixtral-8x7B-NVFP4-nvfp4-quantized/Mixtral-8x7B-Instruct-v0.1]
|
||||
|
||||
Loading…
Reference in New Issue
Block a user