diff --git a/tests/integration/defs/.test_durations b/tests/integration/defs/.test_durations index 5d9d1d41a5..12162c8720 100755 --- a/tests/integration/defs/.test_durations +++ b/tests/integration/defs/.test_durations @@ -33,12 +33,10 @@ "test_unittests.py::test_unittests_v2[unittest/trt/model/test_gpt_e2e.py]": 618.984766190988, "test_unittests.py::test_unittests_v2[unittest/trt/model_api/test_model_api_multi_gpu.py]": 26.751821771264076, "test_accuracy.py::test_accuracy_gpt[gpt-weight-streaming-ootb]": 152.701959496364, - "test_e2e.py::test_llmapi_load_engine_from_build_command[falcon-falcon-7b-instruct]": 250.63044386729598, "test_unittests.py::test_unittests_v2[unittest/trt/attention/test_gpt_attention.py -k \"partition2\"]": 92.527716698125, "test_unittests.py::test_unittests_v2[unittest/trt/model/test_llama.py]": 2248.572680544108, "disaggregated/test_disaggregated.py::test_disaggregated_cuda_graph[TinyLlama-1.1B-Chat-v1.0]": 89.62453198479488, "test_accuracy.py::test_accuracy_gpt[gpt-paged-kv-cache]": 111.11273829906713, - "test_e2e.py::test_llmapi_load_engine_from_build_command[gptj-gpt-j-6b]": 229.89251923607662, "test_e2e.py::test_llmapi_quickstart_atexit": 95.69416327599902, "test_unittests.py::test_unittests_v2[unittest/trt/attention/test_gpt_attention.py -k \"partition3\"]": 90.72128840722144, "test_unittests.py::test_unittests_v2[unittest/trt/functional]": 554.7785050286911, diff --git a/tests/integration/test_lists/qa/examples_test_list.txt b/tests/integration/test_lists/qa/examples_test_list.txt index 98a9ac3f2a..1e00e1f1f0 100644 --- a/tests/integration/test_lists/qa/examples_test_list.txt +++ b/tests/integration/test_lists/qa/examples_test_list.txt @@ -439,8 +439,6 @@ test_e2e.py::test_falcon_e2e[gpu_percent_0_8-use_cpp_session-mqa] test_e2e.py::test_llama_e2e[use_cpp_session-remove_input_padding] test_e2e.py::test_llama_e2e[use_py_session-remove_input_padding] test_e2e.py::test_llama_e2e[use_py_session] -test_e2e.py::test_llmapi_load_engine_from_build_command[falcon-falcon-7b-instruct] # 5min -test_e2e.py::test_llmapi_load_engine_from_build_command[gptj-gpt-j-6b] # 5min test_e2e.py::test_llmapi_load_engine_from_build_command[llama-codellama/CodeLlama-7b-Instruct-hf] # 5min test_e2e.py::test_llmapi_load_engine_from_build_command[llama-llama-models/llama-7b-hf] # 5min test_e2e.py::test_mistral_e2e[use_cpp_session-remove_input_padding] diff --git a/tests/integration/test_lists/qa/llm_sanity_test.txt b/tests/integration/test_lists/qa/llm_sanity_test.txt index 4d0c64cc11..f20b0f7f76 100644 --- a/tests/integration/test_lists/qa/llm_sanity_test.txt +++ b/tests/integration/test_lists/qa/llm_sanity_test.txt @@ -92,8 +92,6 @@ examples/test_whisper.py::test_llm_whisper_general[large-v3-disable_gemm_plugin- test_e2e.py::test_llama_e2e[use_cpp_session-remove_input_padding] test_e2e.py::test_llama_e2e[use_py_session-remove_input_padding] test_e2e.py::test_llama_e2e[use_py_session] -test_e2e.py::test_llmapi_load_engine_from_build_command[falcon-falcon-7b-instruct] # 5min -test_e2e.py::test_llmapi_load_engine_from_build_command[gptj-gpt-j-6b] # 5min test_e2e.py::test_llmapi_load_engine_from_build_command[llama-codellama/CodeLlama-7b-Instruct-hf] # 5min test_e2e.py::test_llmapi_load_engine_from_build_command[llama-llama-models/llama-7b-hf] # 5min test_e2e.py::test_mistral_e2e[use_cpp_session-remove_input_padding] diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index 6a2c3c3d21..0e2d1f700c 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -341,7 +341,6 @@ full:B200/examples/test_llama.py::test_llm_llama_lookahead_xqa_fp8_1gpu[llama-3. full:B200/examples/test_medusa.py::test_llm_medusa_1gpu[use_py_session-medusa-vicuna-7b-v1.3-4-heads-bfloat16-bs1] SKIP (No available XQA kernels are found for speculative decoding mode) full:B200/examples/accuracy/test_cli_flow.py::TestLlama3_1_8BInstruct::test_medusa_fp8_prequantized SKIP (No available XQA kernels are found for speculative decoding mode) full:B200/examples/test_multimodal.py::test_llm_multimodal_general[Llama-3.2-11B-Vision-pp:1-tp:2-bfloat16-bs:1-nb:1] SKIP (Only Context FMHA supports custom mask input currently) -full:B200/test_e2e.py::test_llmapi_load_engine_from_build_command[falcon-falcon-7b-instruct] SKIP (Not supported on B200) full:B200/examples/test_qwen.py::test_llm_qwen_smooth_quant_single_gpu_summary[qwen_7b_chat-enable_ptpc-nb:4] SKIP (Not supported on B200) full:B200/examples/test_gpt.py::test_llm_gpt2_starcoder_weight_only[starcoder2-int4-float16] SKIP (not support on B200) full:B200/examples/test_mixtral.py::test_llm_mixtral_moe_plugin_fp8_lora_4gpus[Mixtral-8x7B-v0.1-chinese-mixtral-lora] SKIP (https://nvbugs/5064768) @@ -444,3 +443,4 @@ examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_cpp_session-re examples/test_mistral.py::test_llm_mistral_nemo_fp8_quantization_1gpu[Mistral-Nemo-12b-Base-summarization] SKIP (https://nvbugspro.nvidia.com/bug/5181262) examples/test_qwen.py::test_llm_qwen_moe_single_gpu_summary[qwen1.5_moe_a2.7b_chat-enable_paged_kv_cache-enable_remove_input_padding-enable_weight_only-enable_fmha] SKIP (https://nvbugs/5180961) disaggregated/test_disaggregated.py::test_disaggregated_overlap_dp[DeepSeek-V3-Lite-fp8] SKIP (https://nvbugs/5166600) +disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_ucx[DeepSeek-V3-Lite-fp8] SKIP (https://nvbugs/5201168)