tests: fix 5250460 (#4751)

Signed-off-by: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com>
2026-01-13 22:18:36 +08:00 · 2025-05-30 10:13:45 +08:00 · 2025-05-30 10:13:45 +08:00 · 1bc3dfa490
commit 1bc3dfa490
parent de0613bd83
3 changed files with 11 additions and 4 deletions
--- a/tests/integration/defs/examples/test_commandr.py
+++ b/tests/integration/defs/examples/test_commandr.py
@ -18,6 +18,7 @@ import os
 import pytest
 from defs.common import (convert_weights, generate_summary_cmd, venv_check_call,
                         venv_mpi_check_call)
+from defs.conftest import get_gpu_device_list
 from defs.trt_test_alternative import check_call


@ -30,6 +31,9 @@ def test_llm_commandr_v01_single_gpu_summary(commandr_example_root,
                                             llm_venv, cmodel_dir, engine_dir,
                                             use_weight_only):
    "Build & run commandr_v01 on single gpu."
+    if "GH200" in get_gpu_device_list()[0] and not use_weight_only:
+        pytest.skip("OOM on GH200. https://nvbugs/5250460")
+
    print("Converting checkpoint...")
    dtype = 'float16'
    model_name = os.path.basename(llm_commandr_v01_model_root)
--- a/tests/integration/defs/examples/test_gemma.py
+++ b/tests/integration/defs/examples/test_gemma.py
@ -17,8 +17,9 @@ from pathlib import Path
 import pytest
 from defs.common import (generate_summary_cmd, test_multi_lora_support,
                         venv_check_call)
-from defs.conftest import (get_device_memory, skip_fp8_pre_ada,
-                           skip_post_blackwell, skip_pre_hopper)
+from defs.conftest import (get_device_memory, get_gpu_device_list,
+                           skip_fp8_pre_ada, skip_post_blackwell,
+                           skip_pre_hopper)
 from defs.trt_test_alternative import check_call


@ -224,6 +225,10 @@ def test_llm_gemma_1gpu_summary(batch_size, data_type, gemma_model_root,
                                llm_venv, cmodel_dir, engine_dir,
                                gemma_example_root, llm_datasets_root,
                                llm_rouge_root, test_case):
+    if "27b" in gemma_model_root and "GH200" in get_gpu_device_list(
+    )[0] and "other" in test_case:
+        pytest.skip("OOM on GH200. https://nvbugs/5250460")
+
    gemma_1gpu_summary(batch_size, data_type, gemma_model_root, llm_venv,
                       cmodel_dir, engine_dir, gemma_example_root,
                       llm_datasets_root, llm_rouge_root, test_case)
--- a/tests/integration/test_lists/waives.txt
+++ b/tests/integration/test_lists/waives.txt
@ -418,8 +418,6 @@ accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales_4gpu
 accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales_4gpus[ep4-mtp_nextn=2-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False] SKIP (https://nvbugs/5239087)
 accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[-] SKIP (https://nvbugs/5234002)
 examples/test_gemma.py::test_llm_hf_gemma_quantization_1gpu[gemma-2-27b-it-fp8-bfloat16-8] SKIP (https://nvbugs/5234164)
-full::GH200/examples/test_commandr.py::test_llm_commandr_v01_single_gpu_summary[disable_weight_only] SKIP (https://nvbugs/5250460)
-full::GH200/examples/test_gemma.py::test_llm_gemma_1gpu_summary[gemma-2-27b-it-other-bfloat16-8] SKIP (https://nvbugs/5250460)
 examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-disable_attention_plugin-disable_context_fmha-tp:1-pp:1-float16-RobertaForSequenceClassification-bert/twitter-roberta-base-emotion] SKIP (https://nvbugs/5234058)
 examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-disable_attention_plugin-disable_context_fmha-tp:2-pp:1-float16-RobertaForSequenceClassification-bert/twitter-roberta-base-emotion] SKIP (https://nvbugs/5234058)
 examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:2-pp:1-float16-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity] SKIP (https://nvbugs/5234058)