[ROCm][CI] Remove benchmarks test group and shard long test groups (#41669)

Signed-off-by: Andreas Karatzas <akaratza@amd.com>
This commit is contained in:
Andreas Karatzas
2026-05-23 10:31:46 -05:00
committed by GitHub
parent 5bb8d2767a
commit 2a7d5b7324
+47 -35
View File
@@ -139,19 +139,6 @@ steps:
- pytest models/multimodal -v -s -m 'distributed(num_gpus=2)' --ignore models/multimodal/generation/test_whisper.py
- VLLM_WORKER_MULTIPROC_METHOD=spawn pytest models/multimodal/generation/test_whisper.py -v -s -m 'distributed(num_gpus=2)'
#-------------------------------------------------------- mi250 · benchmarks ---------------------------------------------------------#
- label: Benchmarks # TBD
timeout_in_minutes: 180
mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
agent_pool: mi250_1
working_dir: "/vllm-workspace/.buildkite"
source_file_dependencies:
- benchmarks/
- vllm/platforms/rocm.py
commands:
- bash scripts/run-benchmarks.sh
#---------------------------------------------------------- mi250 · compile ----------------------------------------------------------#
- label: PyTorch Compilation Unit Tests # TBD
@@ -485,7 +472,7 @@ steps:
- pytest -v -s model_executor -m '(not slow_test)'
- pytest -v -s entrypoints/openai/completion/test_tensorizer_entrypoint.py
#---------------------------------------------------------- mi250 · models -----------------------------------------------------------#
#------------------------------------------------------ mi250 · models / basic -------------------------------------------------------#
- label: Basic Models Test (Other CPU) # TBD
timeout_in_minutes: 180
@@ -546,6 +533,8 @@ steps:
commands:
- pytest -v -s models/test_terratorch.py models/test_transformers.py models/test_registry.py
#----------------------------------------------------- mi250 · models / language -----------------------------------------------------#
- label: Language Models Test (MTEB) # TBD
timeout_in_minutes: 180
mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
@@ -590,6 +579,8 @@ steps:
- pip freeze | grep -E 'torch'
- pytest -v -s models/language -m 'core_model and slow_test' --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB
#---------------------------------------------------- mi250 · models / multimodal ----------------------------------------------------#
- label: Multi-Modal Models (Extended Generation 2) # TBD
timeout_in_minutes: 180
mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
@@ -976,18 +967,6 @@ steps:
#-------------------------------------------------------- mi300 · benchmarks ---------------------------------------------------------#
- label: Benchmarks # TBD
timeout_in_minutes: 180
mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
agent_pool: mi300_1
optional: true
working_dir: "/vllm-workspace/.buildkite"
source_file_dependencies:
- benchmarks/
- vllm/platforms/rocm.py
commands:
- bash scripts/run-benchmarks.sh
- label: Benchmarks CLI Test # TBD
timeout_in_minutes: 180
mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
@@ -1759,7 +1738,7 @@ steps:
- pytest -v -s -x lora/test_gptoss_tp.py
- pytest -v -s -x lora/test_qwen35_densemodel_lora.py
#---------------------------------------------------------- mi300 · models -----------------------------------------------------------#
#----------------------------------------------------- mi300 · models / language -----------------------------------------------------#
- label: Language Models Test (Extended Pooling) # TBD
timeout_in_minutes: 180
@@ -1787,6 +1766,8 @@ steps:
- pip freeze | grep -E 'torch'
- pytest -v -s models/language -m 'core_model and (not slow_test)'
#---------------------------------------------------- mi300 · models / multimodal ----------------------------------------------------#
- label: Multi-Modal Models (Extended Generation 1) # TBD
timeout_in_minutes: 180
mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
@@ -1892,10 +1873,11 @@ steps:
- pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
- pytest -v -s models/multimodal/processing/test_tensor_schema.py
- label: Multi-Modal Processor (CPU) # TBD
- label: Multi-Modal Processor (CPU) %N # TBD
timeout_in_minutes: 180
mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
agent_pool: mi300_1
parallelism: 4
no_gpu: true
optional: true
working_dir: "/vllm-workspace/tests"
@@ -1905,7 +1887,9 @@ steps:
- tests/models/registry.py
commands:
- pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
- pytest -v -s models/multimodal/processing --ignore models/multimodal/processing/test_tensor_schema.py
- pytest -v -s models/multimodal/processing --ignore models/multimodal/processing/test_tensor_schema.py --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB
#----------------------------------------------------- mi300 · models / quantized -----------------------------------------------------#
- label: Quantized Models Test # TBD
timeout_in_minutes: 180
@@ -1921,7 +1905,31 @@ steps:
commands:
- pytest -v -s models/quantization
- label: Transformers Nightly Models # TBD
#-------------------------------------------------- mi300 · models / transformers ---------------------------------------------------#
- label: Transformers Nightly Models (Shardable) %N # TBD
timeout_in_minutes: 180
mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
agent_pool: mi300_1
parallelism: 4
optional: true
working_dir: "/vllm-workspace/"
source_file_dependencies:
- vllm/model_executor/models/
- vllm/model_executor/model_loader/
- vllm/multimodal/
- vllm/model_executor/layers/
- vllm/v1/attention/backends/
- vllm/v1/attention/selector.py
- vllm/_aiter_ops.py
- vllm/platforms/rocm.py
- tests/models/
commands:
- pip install --upgrade git+https://github.com/huggingface/transformers
- pytest -v -s tests/models/test_initialization.py --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB
- pytest -v -s tests/models/multimodal/processing/ --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB
- label: Transformers Nightly Models (Single) # TBD
timeout_in_minutes: 180
mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
agent_pool: mi300_1
@@ -1940,9 +1948,7 @@ steps:
- examples/
commands:
- pip install --upgrade git+https://github.com/huggingface/transformers
- pytest -v -s tests/models/test_initialization.py
- pytest -v -s tests/models/test_transformers.py
- pytest -v -s tests/models/multimodal/processing/
- pytest -v -s tests/models/multimodal/test_mapping.py
- python3 examples/basic/offline_inference/chat.py
- python3 examples/generate/multimodal/vision_language_offline.py --model-type qwen2_5_vl
@@ -2391,7 +2397,7 @@ steps:
- uv pip install --system -r /vllm-workspace/requirements/kv_connectors_rocm.txt
- DP_EP=1 ROCM_ATTN=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
- label: Hyrbid SSM NixlConnector PD accuracy tests (4 GPUs) # TBD
- label: Hybrid SSM NixlConnector PD accuracy tests (4 GPUs) # TBD
timeout_in_minutes: 180
mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
agent_pool: mi300_4
@@ -2593,7 +2599,7 @@ steps:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large-rocm.txt --tp-size=8
#---------------------------------------------------------- mi325 · models -----------------------------------------------------------#
#----------------------------------------------------- mi325 · models / language -----------------------------------------------------#
- label: Language Models Test (Extended Generation) # TBD
timeout_in_minutes: 180
@@ -2624,6 +2630,8 @@ steps:
- uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.6.0'
- pytest -v -s models/language/generation -m hybrid_model --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB
#---------------------------------------------------- mi325 · models / multimodal ----------------------------------------------------#
- label: Multi-Modal Models (Extended Pooling) # TBD
timeout_in_minutes: 180
mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
@@ -3043,7 +3051,7 @@ steps:
commands:
- pytest -v -s kernels/moe/test_deepep_moe.py
#---------------------------------------------------------- mi355 · models -----------------------------------------------------------#
#----------------------------------------------------- mi355 · models / language -----------------------------------------------------#
- label: Language Models Test (Extended Generation) # TBD
timeout_in_minutes: 180
@@ -3111,6 +3119,8 @@ steps:
- pip freeze | grep -E 'torch'
- pytest -v -s models/language -m 'core_model and (not slow_test)'
#---------------------------------------------------- mi355 · models / multimodal ----------------------------------------------------#
- label: Multi-Modal Models (Extended Generation 1) # TBD
timeout_in_minutes: 180
mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
@@ -3182,6 +3192,8 @@ steps:
- pytest -v -s models/multimodal/generation/test_memory_leak.py -m core_model
- cd .. && VLLM_WORKER_MULTIPROC_METHOD=spawn pytest -v -s tests/models/multimodal/generation/test_whisper.py -m core_model
#----------------------------------------------------- mi355 · models / quantized -----------------------------------------------------#
- label: Quantized Models Test # TBD
timeout_in_minutes: 180
mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]