diff --git a/tests/integration/test_lists/test-db/l0_sanity_check.yml b/tests/integration/test_lists/test-db/l0_sanity_check.yml index f88ac77337..21aafd1e97 100644 --- a/tests/integration/test_lists/test-db/l0_sanity_check.yml +++ b/tests/integration/test_lists/test-db/l0_sanity_check.yml @@ -1,38 +1,58 @@ version: 0.0.1 l0_sanity_check: - - condition: - ranges: - system_gpu_count: - gte: 1 - lte: 1 - wildcards: - gpu: - - '*b100*' - - '*h100*' - - '*h200*' - - '*l40s*' - - '*a10*' - - '*gb202*' - - '*gb203*' - - '*5080*' - - '*5090*' - linux_distribution_name: ubuntu* - tests: - - llmapi/test_llm_examples.py::test_llmapi_quickstart - - llmapi/test_llm_examples.py::test_llmapi_example_inference - - llmapi/test_llm_examples.py::test_llmapi_example_inference_async - - llmapi/test_llm_examples.py::test_llmapi_example_inference_async_streaming - - llmapi/test_llm_examples.py::test_llmapi_example_multilora - - llmapi/test_llm_examples.py::test_llmapi_example_guided_decoding - - llmapi/test_llm_examples.py::test_llmapi_example_logits_processor - - llmapi/test_llm_examples.py::test_llmapi_speculative_decoding_mtp TIMEOUT (90) - - llmapi/test_llm_examples.py::test_llmapi_speculative_decoding_eagle3 - - llmapi/test_llm_examples.py::test_llmapi_speculative_decoding_ngram - - llmapi/test_llm_examples.py::test_llmapi_sampling - - llmapi/test_llm_examples.py::test_llmapi_runtime - - llmapi/test_llm_examples.py::test_llmapi_tensorrt_engine - - examples/test_llm_api_with_mpi.py::test_llm_api_single_gpu_with_mpirun[TinyLlama-1.1B-Chat-v1.0] ISOLATION - - unittest/others/test_kv_cache_transceiver.py::test_kv_cache_transceiver_single_process[NIXL-mha-ctx_fp16_gen_fp16] - - unittest/others/test_kv_cache_transceiver.py::test_kv_cache_transceiver_single_process[UCX-mha-ctx_fp16_gen_fp16] - - unittest/others/test_kv_cache_transceiver.py::test_cancel_request_in_transmission[mha] - - unittest/others/test_kv_cache_transceiver.py::test_cancel_request_in_transmission[mla] +- condition: + ranges: + system_gpu_count: + gte: 1 + lte: 1 + wildcards: + gpu: + - '*b100*' + - '*h100*' + - '*h200*' + - '*l40s*' + - '*a10*' + - '*gb202*' + - '*gb203*' + - '*5080*' + - '*5090*' + linux_distribution_name: ubuntu* + tests: + - llmapi/test_llm_examples.py::test_llmapi_quickstart + - llmapi/test_llm_examples.py::test_llmapi_example_inference + - llmapi/test_llm_examples.py::test_llmapi_example_inference_async + - llmapi/test_llm_examples.py::test_llmapi_example_inference_async_streaming + - llmapi/test_llm_examples.py::test_llmapi_example_multilora + - llmapi/test_llm_examples.py::test_llmapi_example_guided_decoding + - llmapi/test_llm_examples.py::test_llmapi_example_logits_processor + - llmapi/test_llm_examples.py::test_llmapi_sampling + - llmapi/test_llm_examples.py::test_llmapi_runtime + - llmapi/test_llm_examples.py::test_llmapi_tensorrt_engine + - examples/test_llm_api_with_mpi.py::test_llm_api_single_gpu_with_mpirun[TinyLlama-1.1B-Chat-v1.0] ISOLATION + - unittest/others/test_kv_cache_transceiver.py::test_kv_cache_transceiver_single_process[NIXL-mha-ctx_fp16_gen_fp16] + - unittest/others/test_kv_cache_transceiver.py::test_kv_cache_transceiver_single_process[UCX-mha-ctx_fp16_gen_fp16] + - unittest/others/test_kv_cache_transceiver.py::test_cancel_request_in_transmission[mha] + - unittest/others/test_kv_cache_transceiver.py::test_cancel_request_in_transmission[mla] +- condition: + ranges: + system_gpu_count: + gte: 1 + lte: 1 + wildcards: + gpu: + - '*b100*' + - '*h100*' + - '*h200*' + - '*l40s*' + - '*a10*' + - '*gb202*' + - '*gb203*' + - '*5080*' + - '*5090*' + linux_distribution_name: ubuntu* + cpu: x86_64 + tests: + # Don't run long-running and medium-/large-size model sanity tests on GH200 + - llmapi/test_llm_examples.py::test_llmapi_speculative_decoding_mtp TIMEOUT (90) + - llmapi/test_llm_examples.py::test_llmapi_speculative_decoding_eagle3 + - llmapi/test_llm_examples.py::test_llmapi_speculative_decoding_ngram