From 9d3e05486bd0c6c88824863251a86cd5fbee17ec Mon Sep 17 00:00:00 2001 From: Stanley Sun <190317771+StanleySun639@users.noreply.github.com> Date: Thu, 15 May 2025 17:57:31 +0800 Subject: [PATCH] test: add qa test list for rtx5090 and rtx_pro_6000 (#4254) * add test list for rtx5090 and rtx_pro_6000 Signed-off-by: Stanley Sun <190317771+StanleySun639@users.noreply.github.com> * add 2gpu llama70b test cases Signed-off-by: Stanley Sun <190317771+StanleySun639@users.noreply.github.com> * remove duplicate and invalid test cases Signed-off-by: Stanley Sun <190317771+StanleySun639@users.noreply.github.com> * add 2gpus test cases Signed-off-by: Stanley Sun <190317771+StanleySun639@users.noreply.github.com> --------- Signed-off-by: Stanley Sun <190317771+StanleySun639@users.noreply.github.com> --- .../test_lists/qa/llm_release_gb20x.txt | 28 ++++++++++++++++++ .../qa/llm_release_rtx_pro_6000.txt | 29 +++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 tests/integration/test_lists/qa/llm_release_gb20x.txt create mode 100644 tests/integration/test_lists/qa/llm_release_rtx_pro_6000.txt diff --git a/tests/integration/test_lists/qa/llm_release_gb20x.txt b/tests/integration/test_lists/qa/llm_release_gb20x.txt new file mode 100644 index 0000000000..0a7eaa171c --- /dev/null +++ b/tests/integration/test_lists/qa/llm_release_gb20x.txt @@ -0,0 +1,28 @@ +test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-BF16-llama-3.1-model/Meta-Llama-3.1-8B] +test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-FP8-llama-3.1-model/Llama-3.1-8B-Instruct-FP8] +test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-NVFP4-nvfp4-quantized/Meta-Llama-3.1-8B] +test_e2e.py::test_ptp_quickstart_advanced[Llama3.2-11B-BF16-llama-3.2-models/Llama-3.2-11B-Vision] +test_e2e.py::test_ptp_quickstart_multimodal[NVILA-8B-FP16-vila/NVILA-8B-image] +test_e2e.py::test_ptp_quickstart_multimodal[NVILA-8B-FP16-vila/NVILA-8B-video] +test_e2e.py::test_ptp_quickstart_multimodal[qwen2-vl-7b-instruct-Qwen2-VL-7B-Instruct-image] +test_e2e.py::test_ptp_quickstart_multimodal[qwen2-vl-7b-instruct-Qwen2-VL-7B-Instruct-video] +test_e2e.py::test_ptp_quickstart_multimodal[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-image] +test_e2e.py::test_ptp_quickstart_multimodal[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-video] +examples/test_nemotron.py::test_llm_nemotron_3_8b_1gpu[bfloat16-full_prec] +examples/test_nemotron.py::test_llm_nemotron_3_8b_1gpu[bfloat16-int4_awq] +examples/test_nemotron.py::test_llm_nemotron_4_15b_1gpu[bfloat16-fp8] +examples/test_nemotron.py::test_llm_nemotron_4_15b_1gpu[bfloat16-full_prec] +examples/test_llama.py::test_llama_3_x_fp8_with_bf16_lora[llama-3.2-1b] +examples/test_llama.py::test_llama_3_x_fp8_with_bf16_lora[llama-3.2-3b] +examples/test_llama.py::test_mistral_nemo_fp8_with_bf16_lora[Mistral-Nemo-12b-Base] +examples/test_multimodal.py::test_llm_multimodal_general[Llama-3.2-11B-Vision-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False-nb:1] +examples/test_multimodal.py::test_llm_multimodal_general[Llama-3.2-11B-Vision-pp:1-tp:1-bfloat16-bs:8-cpp_e2e:False-nb:1] +examples/test_mistral.py::test_llm_mistral_lora_1gpu[komt-mistral-7b-v1-lora-komt-mistral-7b-v1] +examples/test_mistral.py::test_llm_mistral_v1_1gpu[mistral-7b-v0.1-float16-max_attention_window_size_4096-summarization_long] +examples/test_mistral.py::test_mistral_nemo_minitron_fp8_with_bf16_lora[Mistral-NeMo-Minitron-8B-Instruct] +examples/test_mixtral.py::test_llm_mixtral_int4_awq_1gpu_summary[mixtral-8x7b-v0.1-AWQ] +examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-byt5-small-float32-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:1-enable_fp8] +examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-flan-t5-small-float32-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:1-disable_fp8] +examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-t5-small-float32-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:1-disable_fp8] +accuracy/test_cli_flow.py::TestStarcoder2_3B::test_auto_dtype +accuracy/test_cli_flow.py::TestStarcoder2_15B::test_smooth_quant_ootb diff --git a/tests/integration/test_lists/qa/llm_release_rtx_pro_6000.txt b/tests/integration/test_lists/qa/llm_release_rtx_pro_6000.txt new file mode 100644 index 0000000000..f0377701fc --- /dev/null +++ b/tests/integration/test_lists/qa/llm_release_rtx_pro_6000.txt @@ -0,0 +1,29 @@ +accuracy/test_cli_flow.py::TestMixtral8x7B::test_fp8_tp2pp2 +accuracy/test_cli_flow.py::TestMixtral8x7B::test_fp8_tp2pp2_manage_weights +accuracy/test_cli_flow.py::TestMixtral8x7B::test_nvfp4_prequantized +accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=0-attention_dp=False-cuda_graph=False-overlap_scheduler=False] +accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=0-attention_dp=True-cuda_graph=False-overlap_scheduler=False] +accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=0-attention_dp=False-cuda_graph=True-overlap_scheduler=False] +accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=0-attention_dp=False-cuda_graph=False-overlap_scheduler=True] +accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=0-attention_dp=True-cuda_graph=True-overlap_scheduler=True] +accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=2-attention_dp=False-cuda_graph=False-overlap_scheduler=False] +accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=2-attention_dp=True-cuda_graph=False-overlap_scheduler=False] +accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=2-attention_dp=False-cuda_graph=True-overlap_scheduler=False] +accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=2-attention_dp=False-cuda_graph=False-overlap_scheduler=True] +accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=2-attention_dp=True-cuda_graph=True-overlap_scheduler=True] +accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False] +accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[fp8kv=True-attention_dp=False-cuda_graph=False-overlap_scheduler=False] +accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[fp8kv=False-attention_dp=True-cuda_graph=False-overlap_scheduler=False] +accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[fp8kv=False-attention_dp=False-cuda_graph=True-overlap_scheduler=False] +accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=True] +accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True] +accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_nvfp4 +test_e2e.py::test_ptp_quickstart_advanced_mixed_precision +test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-BF16-llama-3.1-model/Meta-Llama-3.1-8B] +test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-FP8-llama-3.1-model/Llama-3.1-8B-Instruct-FP8] +test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-NVFP4-nvfp4-quantized/Meta-Llama-3.1-8B] +test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-70B-NVFP4-nvfp4-quantized/Meta-Llama-3.1-70B] +test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-70B-FP8-llama-3.1-model/Llama-3.1-70B-Instruct-FP8] +test_e2e.py::test_ptp_quickstart_advanced[Mixtral-8x7B-NVFP4-nvfp4-quantized/Mixtral-8x7B-Instruct-v0.1] +test_e2e.py::test_ptp_quickstart_advanced_2gpus_sm120[Llama3.1-70B-BF16-llama-3.1-model/Meta-Llama-3.1-70B] +test_e2e.py::test_ptp_quickstart_advanced_2gpus_sm120[Mixtral-8x7B-BF16-Mixtral-8x7B-Instruct-v0.1]