[None][ci] Remove long-running sanity check tests on GH200 (#10924) (#10969)

Signed-off-by: Yanchao Lu <yanchaol@nvidia.com>
2026-02-05 02:31:33 +08:00 · 2026-01-24 13:06:28 +08:00 · 2026-01-24 13:06:28 +08:00 · 78a008d61a
commit 78a008d61a
parent da967d0bd7
1 changed files with 56 additions and 36 deletions
--- a/tests/integration/test_lists/test-db/l0_sanity_check.yml
+++ b/tests/integration/test_lists/test-db/l0_sanity_check.yml
@ -1,38 +1,58 @@
 version: 0.0.1
 l0_sanity_check:
-  - condition:
-      ranges:
-        system_gpu_count:
-          gte: 1
-          lte: 1
-      wildcards:
-        gpu:
-          - '*b100*'
-          - '*h100*'
-          - '*h200*'
-          - '*l40s*'
-          - '*a10*'
-          - '*gb202*'
-          - '*gb203*'
-          - '*5080*'
-          - '*5090*'
-        linux_distribution_name: ubuntu*
-    tests:
-      - llmapi/test_llm_examples.py::test_llmapi_quickstart
-      - llmapi/test_llm_examples.py::test_llmapi_example_inference
-      - llmapi/test_llm_examples.py::test_llmapi_example_inference_async
-      - llmapi/test_llm_examples.py::test_llmapi_example_inference_async_streaming
-      - llmapi/test_llm_examples.py::test_llmapi_example_multilora
-      - llmapi/test_llm_examples.py::test_llmapi_example_guided_decoding
-      - llmapi/test_llm_examples.py::test_llmapi_example_logits_processor
-      - llmapi/test_llm_examples.py::test_llmapi_speculative_decoding_mtp TIMEOUT (90)
-      - llmapi/test_llm_examples.py::test_llmapi_speculative_decoding_eagle3
-      - llmapi/test_llm_examples.py::test_llmapi_speculative_decoding_ngram
-      - llmapi/test_llm_examples.py::test_llmapi_sampling
-      - llmapi/test_llm_examples.py::test_llmapi_runtime
-      - llmapi/test_llm_examples.py::test_llmapi_tensorrt_engine
-      - examples/test_llm_api_with_mpi.py::test_llm_api_single_gpu_with_mpirun[TinyLlama-1.1B-Chat-v1.0] ISOLATION
-      - unittest/others/test_kv_cache_transceiver.py::test_kv_cache_transceiver_single_process[NIXL-mha-ctx_fp16_gen_fp16]
-      - unittest/others/test_kv_cache_transceiver.py::test_kv_cache_transceiver_single_process[UCX-mha-ctx_fp16_gen_fp16]
-      - unittest/others/test_kv_cache_transceiver.py::test_cancel_request_in_transmission[mha]
-      - unittest/others/test_kv_cache_transceiver.py::test_cancel_request_in_transmission[mla]
+- condition:
+    ranges:
+      system_gpu_count:
+        gte: 1
+        lte: 1
+    wildcards:
+      gpu:
+        - '*b100*'
+        - '*h100*'
+        - '*h200*'
+        - '*l40s*'
+        - '*a10*'
+        - '*gb202*'
+        - '*gb203*'
+        - '*5080*'
+        - '*5090*'
+      linux_distribution_name: ubuntu*
+  tests:
+    - llmapi/test_llm_examples.py::test_llmapi_quickstart
+    - llmapi/test_llm_examples.py::test_llmapi_example_inference
+    - llmapi/test_llm_examples.py::test_llmapi_example_inference_async
+    - llmapi/test_llm_examples.py::test_llmapi_example_inference_async_streaming
+    - llmapi/test_llm_examples.py::test_llmapi_example_multilora
+    - llmapi/test_llm_examples.py::test_llmapi_example_guided_decoding
+    - llmapi/test_llm_examples.py::test_llmapi_example_logits_processor
+    - llmapi/test_llm_examples.py::test_llmapi_sampling
+    - llmapi/test_llm_examples.py::test_llmapi_runtime
+    - llmapi/test_llm_examples.py::test_llmapi_tensorrt_engine
+    - examples/test_llm_api_with_mpi.py::test_llm_api_single_gpu_with_mpirun[TinyLlama-1.1B-Chat-v1.0] ISOLATION
+    - unittest/others/test_kv_cache_transceiver.py::test_kv_cache_transceiver_single_process[NIXL-mha-ctx_fp16_gen_fp16]
+    - unittest/others/test_kv_cache_transceiver.py::test_kv_cache_transceiver_single_process[UCX-mha-ctx_fp16_gen_fp16]
+    - unittest/others/test_kv_cache_transceiver.py::test_cancel_request_in_transmission[mha]
+    - unittest/others/test_kv_cache_transceiver.py::test_cancel_request_in_transmission[mla]
+- condition:
+    ranges:
+      system_gpu_count:
+        gte: 1
+        lte: 1
+    wildcards:
+      gpu:
+        - '*b100*'
+        - '*h100*'
+        - '*h200*'
+        - '*l40s*'
+        - '*a10*'
+        - '*gb202*'
+        - '*gb203*'
+        - '*5080*'
+        - '*5090*'
+      linux_distribution_name: ubuntu*
+      cpu: x86_64
+  tests:
+    # Don't run long-running and medium-/large-size model sanity tests on GH200
+    - llmapi/test_llm_examples.py::test_llmapi_speculative_decoding_mtp TIMEOUT (90)
+    - llmapi/test_llm_examples.py::test_llmapi_speculative_decoding_eagle3
+    - llmapi/test_llm_examples.py::test_llmapi_speculative_decoding_ngram