[None][ci] Remove long-running sanity check tests on GH200 (#10924) (#10969)

Signed-off-by: Yanchao Lu <yanchaol@nvidia.com>
This commit is contained in:
Yanchao Lu 2026-01-24 13:06:28 +08:00 committed by GitHub
parent da967d0bd7
commit 78a008d61a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,38 +1,58 @@
version: 0.0.1
l0_sanity_check:
- condition:
ranges:
system_gpu_count:
gte: 1
lte: 1
wildcards:
gpu:
- '*b100*'
- '*h100*'
- '*h200*'
- '*l40s*'
- '*a10*'
- '*gb202*'
- '*gb203*'
- '*5080*'
- '*5090*'
linux_distribution_name: ubuntu*
tests:
- llmapi/test_llm_examples.py::test_llmapi_quickstart
- llmapi/test_llm_examples.py::test_llmapi_example_inference
- llmapi/test_llm_examples.py::test_llmapi_example_inference_async
- llmapi/test_llm_examples.py::test_llmapi_example_inference_async_streaming
- llmapi/test_llm_examples.py::test_llmapi_example_multilora
- llmapi/test_llm_examples.py::test_llmapi_example_guided_decoding
- llmapi/test_llm_examples.py::test_llmapi_example_logits_processor
- llmapi/test_llm_examples.py::test_llmapi_speculative_decoding_mtp TIMEOUT (90)
- llmapi/test_llm_examples.py::test_llmapi_speculative_decoding_eagle3
- llmapi/test_llm_examples.py::test_llmapi_speculative_decoding_ngram
- llmapi/test_llm_examples.py::test_llmapi_sampling
- llmapi/test_llm_examples.py::test_llmapi_runtime
- llmapi/test_llm_examples.py::test_llmapi_tensorrt_engine
- examples/test_llm_api_with_mpi.py::test_llm_api_single_gpu_with_mpirun[TinyLlama-1.1B-Chat-v1.0] ISOLATION
- unittest/others/test_kv_cache_transceiver.py::test_kv_cache_transceiver_single_process[NIXL-mha-ctx_fp16_gen_fp16]
- unittest/others/test_kv_cache_transceiver.py::test_kv_cache_transceiver_single_process[UCX-mha-ctx_fp16_gen_fp16]
- unittest/others/test_kv_cache_transceiver.py::test_cancel_request_in_transmission[mha]
- unittest/others/test_kv_cache_transceiver.py::test_cancel_request_in_transmission[mla]
- condition:
ranges:
system_gpu_count:
gte: 1
lte: 1
wildcards:
gpu:
- '*b100*'
- '*h100*'
- '*h200*'
- '*l40s*'
- '*a10*'
- '*gb202*'
- '*gb203*'
- '*5080*'
- '*5090*'
linux_distribution_name: ubuntu*
tests:
- llmapi/test_llm_examples.py::test_llmapi_quickstart
- llmapi/test_llm_examples.py::test_llmapi_example_inference
- llmapi/test_llm_examples.py::test_llmapi_example_inference_async
- llmapi/test_llm_examples.py::test_llmapi_example_inference_async_streaming
- llmapi/test_llm_examples.py::test_llmapi_example_multilora
- llmapi/test_llm_examples.py::test_llmapi_example_guided_decoding
- llmapi/test_llm_examples.py::test_llmapi_example_logits_processor
- llmapi/test_llm_examples.py::test_llmapi_sampling
- llmapi/test_llm_examples.py::test_llmapi_runtime
- llmapi/test_llm_examples.py::test_llmapi_tensorrt_engine
- examples/test_llm_api_with_mpi.py::test_llm_api_single_gpu_with_mpirun[TinyLlama-1.1B-Chat-v1.0] ISOLATION
- unittest/others/test_kv_cache_transceiver.py::test_kv_cache_transceiver_single_process[NIXL-mha-ctx_fp16_gen_fp16]
- unittest/others/test_kv_cache_transceiver.py::test_kv_cache_transceiver_single_process[UCX-mha-ctx_fp16_gen_fp16]
- unittest/others/test_kv_cache_transceiver.py::test_cancel_request_in_transmission[mha]
- unittest/others/test_kv_cache_transceiver.py::test_cancel_request_in_transmission[mla]
- condition:
ranges:
system_gpu_count:
gte: 1
lte: 1
wildcards:
gpu:
- '*b100*'
- '*h100*'
- '*h200*'
- '*l40s*'
- '*a10*'
- '*gb202*'
- '*gb203*'
- '*5080*'
- '*5090*'
linux_distribution_name: ubuntu*
cpu: x86_64
tests:
# Don't run long-running and medium-/large-size model sanity tests on GH200
- llmapi/test_llm_examples.py::test_llmapi_speculative_decoding_mtp TIMEOUT (90)
- llmapi/test_llm_examples.py::test_llmapi_speculative_decoding_eagle3
- llmapi/test_llm_examples.py::test_llmapi_speculative_decoding_ngram