[Infra] - Minor clean-up and test Ubuntu mirrors (#4829)

Signed-off-by: Yanchao Lu <yanchaol@nvidia.com> Signed-off-by: QI JUN <22017000+QiJune@users.noreply.github.com> Co-authored-by: QI JUN <22017000+QiJune@users.noreply.github.com>
2026-01-13 22:18:36 +08:00 · 2025-06-02 20:18:20 +08:00 · 2025-06-02 20:18:20 +08:00 · 8166649d03
commit 8166649d03
parent eb2d51a429
7 changed files with 55 additions and 42 deletions
--- a/examples/summarize.py
+++ b/examples/summarize.py
@ -314,7 +314,7 @@ def main(args):
                theta=pretrain_config.rotary_base,
            )

-        if batch_size == 0:
+        if batch_size == 0 or len(batch_input_ids) == 0:
            return [], [], [], {}
        input_lengths = [x.size(0) for x in batch_input_ids]

--- a/jenkins/L0_MergeRequest.groovy
+++ b/jenkins/L0_MergeRequest.groovy
@ -1006,6 +1006,12 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars)
                            ]
                        }

+                        if (env.testPhase2StageName) {
+                            parameters += [
+                                'testPhase2StageName': env.testPhase2StageName,
+                            ]
+                        }
+
                        echo "trigger SBSA test job, params: ${parameters}"

                        def status = triggerJob(
--- a/jenkins/L0_Test.groovy
+++ b/jenkins/L0_Test.groovy
@ -1581,13 +1581,11 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
        "H100_PCIe-TensorRT-[Post-Merge]-1": ["h100-cr", "l0_h100", 1, 2],
        "H100_PCIe-TensorRT-[Post-Merge]-2": ["h100-cr", "l0_h100", 2, 2],
        "B200_PCIe-Triton-Python-[Post-Merge]-1": ["b100-ts2", "l0_b200", 1, 1],
-        "DGX_H100-4_GPUs-TensorRT-[Post-Merge]-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
-        // "A100_80GB_PCIE-TensorRT-Perf-1": ["a100-80gb-pcie", "l0_perf", 1, 1],
        "H100_PCIe-TensorRT-Perf-1": ["h100-cr", "l0_perf", 1, 1],
        "H100_PCIe-PyTorch-Perf-1": ["h100-cr", "l0_perf", 1, 1],
        "DGX_H200-8_GPUs-PyTorch-[Post-Merge]-1": ["dgx-h200-x8", "l0_dgx_h200", 1, 1, 8],
-        "DGX_H200-4_GPUs-PyTorch-[Post-Merge]-1": ["dgx-h200-x4", "l0_dgx_h200", 1, 2, 4],
-        "DGX_H200-4_GPUs-PyTorch-[Post-Merge]-2": ["dgx-h200-x4", "l0_dgx_h200", 2, 2, 4],
+        "DGX_H200-4_GPUs-PyTorch-[Post-Merge]-1": ["dgx-h200-x4", "l0_dgx_h200", 1, 1, 4],
+        "DGX_H200-4_GPUs-TensorRT-[Post-Merge]-1": ["dgx-h200-x4", "l0_dgx_h200", 1, 1, 4],
    ]

    parallelJobs = x86TestConfigs.collectEntries{key, values -> [key, [createKubernetesPodConfig(LLM_DOCKER_IMAGE, values[0], "amd64", values[4] ?: 1, key.contains("Perf")), {
@ -1636,7 +1634,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
    fullSet += SBSASlurmTestConfigs.keySet()

    if (env.targetArch == AARCH64_TRIPLE) {
-        parallelJobs = SBSASlurmTestConfigs.collectEntries{key, values -> [key, [createKubernetesPodConfig(LLM_DOCKER_IMAGE, values[0], "arm64"), {
+        parallelJobs = SBSATestConfigs.collectEntries{key, values -> [key, [createKubernetesPodConfig(LLM_DOCKER_IMAGE, values[0], "arm64"), {
            runLLMTestlistOnPlatform(pipeline, values[0], values[1], LINUX_AARCH64_CONFIG, false, key, values[2], values[3])
        }]]}

--- a/tests/integration/test_lists/test-db/l0_dgx_h100.yml
+++ b/tests/integration/test_lists/test-db/l0_dgx_h100.yml
@ -210,39 +210,3 @@ l0_dgx_h100:
  - examples/test_multimodal.py::test_llm_multimodal_general[Llama-3.2-11B-Vision-pp:1-tp:2-bfloat16-bs:1-cpp_e2e:False-nb:1]
  - examples/test_multimodal.py::test_llm_multimodal_general[llava-v1.6-mistral-7b-hf-vision-trtllm-pp:1-tp:2-float16-bs:1-cpp_e2e:False-nb:1]
  - deterministic/test_mixtral_deterministic.py::test_llm_mixtral_4gpus_deterministic[Mixtral-8x7B-Instruct-v0.1-float16]
- condition:
-    ranges:
-      system_gpu_count:
-        gte: 4
-        lte: 4
-    wildcards:
-      gpu:
-      - '*h100*'
-      linux_distribution_name: ubuntu*
-    terms:
-      stage: post_merge
-      backend: tensorrt
-      auto_trigger: others
-  tests:
-  # ------------- TRT tests ---------------
-  - examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:2-pp:1-float16-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]
-  - examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:2-pp:1-float16-RobertaForQuestionAnswering-bert/roberta-base-squad2]
-  - examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-disable_attention_plugin-disable_context_fmha-tp:2-pp:1-float16-RobertaForSequenceClassification-bert/twitter-roberta-base-emotion]
-  - examples/test_recurrentgemma.py::test_llm_recurrentgemma_2gpu[recurrentgemma-2b]
-  - examples/test_mamba.py::test_llm_mamba2_2gpu[mamba-codestral-7B-v0.1]
-  - accuracy/test_cli_flow.py::TestLlama2_7B::test_smooth_quant_ootb_tp2
-  - accuracy/test_cli_flow.py::TestLlama2_7B::test_int4_awq_tp2
-  - accuracy/test_cli_flow.py::TestLlama2_7B::test_int4_awq_prequantized_tp2
-  - accuracy/test_cli_flow.py::TestLlama2_7B::test_int4_gptq_prequantized_tp2
-  - accuracy/test_cli_flow.py::TestLlama3_1_8B::test_tp4[disable_gemm_allreduce_plugin]
-  - accuracy/test_cli_flow.py::TestLlama3_1_8B::test_tp4[enable_gemm_allreduce_plugin]
-  - accuracy/test_cli_flow.py::TestLlama3_1_8B::test_fp8_rowwise_tp4[disable_gemm_allreduce_plugin]
-  - accuracy/test_cli_flow.py::TestLlama3_2_1B::test_fp8_tp2[disable_reduce_fusion-enable_fp8_context_fmha]
-  - accuracy/test_cli_flow.py::TestLlama3_2_1B::test_fp8_tp2[enable_reduce_fusion-disable_fp8_context_fmha]
-  - accuracy/test_cli_flow.py::TestPhi2::test_tp2
-  - llmapi/test_llm_e2e.py::test_llmapi_quant_llama_70b
-  - llmapi/test_llm_examples.py::test_llmapi_example_distributed_autopp_tp2
-  - examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-t5-small-float16-enable_gemm_plugin-enable_attention_plugin-disable_paged_kv_cache-tp:2-pp:2-nb:1-disable_fp8]
-  - examples/test_gpt.py::test_llm_gpt2_next_prompt_tuning[use_py_session-tp2]
-  - unittest/llmapi/apps/_test_openai_multi_gpu.py -m "part0"
-  - test_e2e.py::test_trtllm_bench_mgmn
--- a/tests/integration/test_lists/test-db/l0_dgx_h200.yml
+++ b/tests/integration/test_lists/test-db/l0_dgx_h200.yml
@ -9,6 +9,7 @@ l0_dgx_h200:
      gpu:
      - '*h200*'
      linux_distribution_name: ubuntu*
+      cpu: x86_64
    terms:
      stage: post_merge
      backend: pytorch
@ -31,6 +32,7 @@ l0_dgx_h200:
      gpu:
      - '*h200*'
      linux_distribution_name: ubuntu*
+      cpu: x86_64
    terms:
      stage: post_merge
      backend: pytorch
@ -89,3 +91,39 @@ l0_dgx_h200:
  - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_4gpus[tp2pp2-fp8kv=True-attn_backend=FLASHINFER-torch_compile=False]
  - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_4gpus[tp2pp2-fp8kv=True-attn_backend=FLASHINFER-torch_compile=True]
  - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_4gpus[pp4-fp8kv=True-attn_backend=FLASHINFER-torch_compile=False]
+- condition:
+    ranges:
+      system_gpu_count:
+        gte: 4
+        lte: 4
+    wildcards:
+      gpu:
+      - '*h200*'
+      linux_distribution_name: ubuntu*
+      cpu: x86_64
+    terms:
+      stage: post_merge
+      backend: tensorrt
+  tests:
+  # ------------- TRT tests ---------------
+  - examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:2-pp:1-float16-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]
+  - examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:2-pp:1-float16-RobertaForQuestionAnswering-bert/roberta-base-squad2]
+  - examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-disable_attention_plugin-disable_context_fmha-tp:2-pp:1-float16-RobertaForSequenceClassification-bert/twitter-roberta-base-emotion]
+  - examples/test_recurrentgemma.py::test_llm_recurrentgemma_2gpu[recurrentgemma-2b]
+  - examples/test_mamba.py::test_llm_mamba2_2gpu[mamba-codestral-7B-v0.1]
+  - accuracy/test_cli_flow.py::TestLlama2_7B::test_smooth_quant_ootb_tp2
+  - accuracy/test_cli_flow.py::TestLlama2_7B::test_int4_awq_tp2
+  - accuracy/test_cli_flow.py::TestLlama2_7B::test_int4_awq_prequantized_tp2
+  - accuracy/test_cli_flow.py::TestLlama2_7B::test_int4_gptq_prequantized_tp2
+  - accuracy/test_cli_flow.py::TestLlama3_1_8B::test_tp4[disable_gemm_allreduce_plugin]
+  - accuracy/test_cli_flow.py::TestLlama3_1_8B::test_tp4[enable_gemm_allreduce_plugin]
+  - accuracy/test_cli_flow.py::TestLlama3_1_8B::test_fp8_rowwise_tp4[disable_gemm_allreduce_plugin]
+  - accuracy/test_cli_flow.py::TestLlama3_2_1B::test_fp8_tp2[disable_reduce_fusion-enable_fp8_context_fmha]
+  - accuracy/test_cli_flow.py::TestLlama3_2_1B::test_fp8_tp2[enable_reduce_fusion-disable_fp8_context_fmha]
+  - accuracy/test_cli_flow.py::TestPhi2::test_tp2
+  - llmapi/test_llm_e2e.py::test_llmapi_quant_llama_70b
+  - llmapi/test_llm_examples.py::test_llmapi_example_distributed_autopp_tp2
+  - examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-t5-small-float16-enable_gemm_plugin-enable_attention_plugin-disable_paged_kv_cache-tp:2-pp:2-nb:1-disable_fp8]
+  - examples/test_gpt.py::test_llm_gpt2_next_prompt_tuning[use_py_session-tp2]
+  - unittest/llmapi/apps/_test_openai_multi_gpu.py -m "part0"
+  - test_e2e.py::test_trtllm_bench_mgmn
--- a/tests/integration/test_lists/test-db/l0_gh200.yml
+++ b/tests/integration/test_lists/test-db/l0_gh200.yml
@ -9,6 +9,7 @@ l0_gh200:
      gpu:
      - '*h200*'
      linux_distribution_name: ubuntu*
+      cpu: aarch64
    terms:
      stage: pre_merge
      backend: tensorrt
--- a/tests/integration/test_lists/waives.txt
+++ b/tests/integration/test_lists/waives.txt
@ -461,3 +461,9 @@ test_e2e.py::test_trtllm_bench_llmapi_launch[trt_backend-llama-v3-llama3-8b] SKI
 accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[False] SKIP (https://nvbugs/5303573)
 accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[True] SKIP (https://nvbugs/5303573)
 test_e2e.py::test_openai_multi_chat_example SKIP (https://nvbugs/5236980)
+test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-70B-FP8-llama-3.1-model/Llama-3.1-70B-Instruct-FP8] SKIP (https://nvbugs/5318059)
+accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[mtp_nextn=0-fp8kv=False-attention_dp=True-cuda_graph=False-overlap_scheduler=False-torch_compile=False] SKIP (https://nvbugs/5318087)
+accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] SKIP (https://nvbugs/5318087)
+unittest/_torch/auto_deploy/integration/test_ad_build.py SKIP (https://nvbugs/5318103)
+accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp2pp2-attn_backend=TRTLLM-torch_compile=False] SKIP (https://nvbugs/5318143)
+accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp2pp2-attn_backend=TRTLLM-torch_compile=True] SKIP (https://nvbugs/5318143)