mirror of
https://github.com/vllm-project/vllm.git
synced 2026-06-06 00:16:14 +00:00
[ROCm][CI] Remove benchmarks test group and shard long test groups (#41669)
Signed-off-by: Andreas Karatzas <akaratza@amd.com>
This commit is contained in:
+47
-35
@@ -139,19 +139,6 @@ steps:
|
||||
- pytest models/multimodal -v -s -m 'distributed(num_gpus=2)' --ignore models/multimodal/generation/test_whisper.py
|
||||
- VLLM_WORKER_MULTIPROC_METHOD=spawn pytest models/multimodal/generation/test_whisper.py -v -s -m 'distributed(num_gpus=2)'
|
||||
|
||||
#-------------------------------------------------------- mi250 · benchmarks ---------------------------------------------------------#
|
||||
|
||||
- label: Benchmarks # TBD
|
||||
timeout_in_minutes: 180
|
||||
mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
|
||||
agent_pool: mi250_1
|
||||
working_dir: "/vllm-workspace/.buildkite"
|
||||
source_file_dependencies:
|
||||
- benchmarks/
|
||||
- vllm/platforms/rocm.py
|
||||
commands:
|
||||
- bash scripts/run-benchmarks.sh
|
||||
|
||||
#---------------------------------------------------------- mi250 · compile ----------------------------------------------------------#
|
||||
|
||||
- label: PyTorch Compilation Unit Tests # TBD
|
||||
@@ -485,7 +472,7 @@ steps:
|
||||
- pytest -v -s model_executor -m '(not slow_test)'
|
||||
- pytest -v -s entrypoints/openai/completion/test_tensorizer_entrypoint.py
|
||||
|
||||
#---------------------------------------------------------- mi250 · models -----------------------------------------------------------#
|
||||
#------------------------------------------------------ mi250 · models / basic -------------------------------------------------------#
|
||||
|
||||
- label: Basic Models Test (Other CPU) # TBD
|
||||
timeout_in_minutes: 180
|
||||
@@ -546,6 +533,8 @@ steps:
|
||||
commands:
|
||||
- pytest -v -s models/test_terratorch.py models/test_transformers.py models/test_registry.py
|
||||
|
||||
#----------------------------------------------------- mi250 · models / language -----------------------------------------------------#
|
||||
|
||||
- label: Language Models Test (MTEB) # TBD
|
||||
timeout_in_minutes: 180
|
||||
mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
|
||||
@@ -590,6 +579,8 @@ steps:
|
||||
- pip freeze | grep -E 'torch'
|
||||
- pytest -v -s models/language -m 'core_model and slow_test' --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB
|
||||
|
||||
#---------------------------------------------------- mi250 · models / multimodal ----------------------------------------------------#
|
||||
|
||||
- label: Multi-Modal Models (Extended Generation 2) # TBD
|
||||
timeout_in_minutes: 180
|
||||
mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250]
|
||||
@@ -976,18 +967,6 @@ steps:
|
||||
|
||||
#-------------------------------------------------------- mi300 · benchmarks ---------------------------------------------------------#
|
||||
|
||||
- label: Benchmarks # TBD
|
||||
timeout_in_minutes: 180
|
||||
mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
|
||||
agent_pool: mi300_1
|
||||
optional: true
|
||||
working_dir: "/vllm-workspace/.buildkite"
|
||||
source_file_dependencies:
|
||||
- benchmarks/
|
||||
- vllm/platforms/rocm.py
|
||||
commands:
|
||||
- bash scripts/run-benchmarks.sh
|
||||
|
||||
- label: Benchmarks CLI Test # TBD
|
||||
timeout_in_minutes: 180
|
||||
mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
|
||||
@@ -1759,7 +1738,7 @@ steps:
|
||||
- pytest -v -s -x lora/test_gptoss_tp.py
|
||||
- pytest -v -s -x lora/test_qwen35_densemodel_lora.py
|
||||
|
||||
#---------------------------------------------------------- mi300 · models -----------------------------------------------------------#
|
||||
#----------------------------------------------------- mi300 · models / language -----------------------------------------------------#
|
||||
|
||||
- label: Language Models Test (Extended Pooling) # TBD
|
||||
timeout_in_minutes: 180
|
||||
@@ -1787,6 +1766,8 @@ steps:
|
||||
- pip freeze | grep -E 'torch'
|
||||
- pytest -v -s models/language -m 'core_model and (not slow_test)'
|
||||
|
||||
#---------------------------------------------------- mi300 · models / multimodal ----------------------------------------------------#
|
||||
|
||||
- label: Multi-Modal Models (Extended Generation 1) # TBD
|
||||
timeout_in_minutes: 180
|
||||
mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
|
||||
@@ -1892,10 +1873,11 @@ steps:
|
||||
- pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
|
||||
- pytest -v -s models/multimodal/processing/test_tensor_schema.py
|
||||
|
||||
- label: Multi-Modal Processor (CPU) # TBD
|
||||
- label: Multi-Modal Processor (CPU) %N # TBD
|
||||
timeout_in_minutes: 180
|
||||
mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
|
||||
agent_pool: mi300_1
|
||||
parallelism: 4
|
||||
no_gpu: true
|
||||
optional: true
|
||||
working_dir: "/vllm-workspace/tests"
|
||||
@@ -1905,7 +1887,9 @@ steps:
|
||||
- tests/models/registry.py
|
||||
commands:
|
||||
- pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
|
||||
- pytest -v -s models/multimodal/processing --ignore models/multimodal/processing/test_tensor_schema.py
|
||||
- pytest -v -s models/multimodal/processing --ignore models/multimodal/processing/test_tensor_schema.py --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB
|
||||
|
||||
#----------------------------------------------------- mi300 · models / quantized -----------------------------------------------------#
|
||||
|
||||
- label: Quantized Models Test # TBD
|
||||
timeout_in_minutes: 180
|
||||
@@ -1921,7 +1905,31 @@ steps:
|
||||
commands:
|
||||
- pytest -v -s models/quantization
|
||||
|
||||
- label: Transformers Nightly Models # TBD
|
||||
#-------------------------------------------------- mi300 · models / transformers ---------------------------------------------------#
|
||||
|
||||
- label: Transformers Nightly Models (Shardable) %N # TBD
|
||||
timeout_in_minutes: 180
|
||||
mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
|
||||
agent_pool: mi300_1
|
||||
parallelism: 4
|
||||
optional: true
|
||||
working_dir: "/vllm-workspace/"
|
||||
source_file_dependencies:
|
||||
- vllm/model_executor/models/
|
||||
- vllm/model_executor/model_loader/
|
||||
- vllm/multimodal/
|
||||
- vllm/model_executor/layers/
|
||||
- vllm/v1/attention/backends/
|
||||
- vllm/v1/attention/selector.py
|
||||
- vllm/_aiter_ops.py
|
||||
- vllm/platforms/rocm.py
|
||||
- tests/models/
|
||||
commands:
|
||||
- pip install --upgrade git+https://github.com/huggingface/transformers
|
||||
- pytest -v -s tests/models/test_initialization.py --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB
|
||||
- pytest -v -s tests/models/multimodal/processing/ --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB
|
||||
|
||||
- label: Transformers Nightly Models (Single) # TBD
|
||||
timeout_in_minutes: 180
|
||||
mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
|
||||
agent_pool: mi300_1
|
||||
@@ -1940,9 +1948,7 @@ steps:
|
||||
- examples/
|
||||
commands:
|
||||
- pip install --upgrade git+https://github.com/huggingface/transformers
|
||||
- pytest -v -s tests/models/test_initialization.py
|
||||
- pytest -v -s tests/models/test_transformers.py
|
||||
- pytest -v -s tests/models/multimodal/processing/
|
||||
- pytest -v -s tests/models/multimodal/test_mapping.py
|
||||
- python3 examples/basic/offline_inference/chat.py
|
||||
- python3 examples/generate/multimodal/vision_language_offline.py --model-type qwen2_5_vl
|
||||
@@ -2391,7 +2397,7 @@ steps:
|
||||
- uv pip install --system -r /vllm-workspace/requirements/kv_connectors_rocm.txt
|
||||
- DP_EP=1 ROCM_ATTN=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
|
||||
|
||||
- label: Hyrbid SSM NixlConnector PD accuracy tests (4 GPUs) # TBD
|
||||
- label: Hybrid SSM NixlConnector PD accuracy tests (4 GPUs) # TBD
|
||||
timeout_in_minutes: 180
|
||||
mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300]
|
||||
agent_pool: mi300_4
|
||||
@@ -2593,7 +2599,7 @@ steps:
|
||||
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
|
||||
- pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large-rocm.txt --tp-size=8
|
||||
|
||||
#---------------------------------------------------------- mi325 · models -----------------------------------------------------------#
|
||||
#----------------------------------------------------- mi325 · models / language -----------------------------------------------------#
|
||||
|
||||
- label: Language Models Test (Extended Generation) # TBD
|
||||
timeout_in_minutes: 180
|
||||
@@ -2624,6 +2630,8 @@ steps:
|
||||
- uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.6.0'
|
||||
- pytest -v -s models/language/generation -m hybrid_model --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB
|
||||
|
||||
#---------------------------------------------------- mi325 · models / multimodal ----------------------------------------------------#
|
||||
|
||||
- label: Multi-Modal Models (Extended Pooling) # TBD
|
||||
timeout_in_minutes: 180
|
||||
mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325]
|
||||
@@ -3043,7 +3051,7 @@ steps:
|
||||
commands:
|
||||
- pytest -v -s kernels/moe/test_deepep_moe.py
|
||||
|
||||
#---------------------------------------------------------- mi355 · models -----------------------------------------------------------#
|
||||
#----------------------------------------------------- mi355 · models / language -----------------------------------------------------#
|
||||
|
||||
- label: Language Models Test (Extended Generation) # TBD
|
||||
timeout_in_minutes: 180
|
||||
@@ -3111,6 +3119,8 @@ steps:
|
||||
- pip freeze | grep -E 'torch'
|
||||
- pytest -v -s models/language -m 'core_model and (not slow_test)'
|
||||
|
||||
#---------------------------------------------------- mi355 · models / multimodal ----------------------------------------------------#
|
||||
|
||||
- label: Multi-Modal Models (Extended Generation 1) # TBD
|
||||
timeout_in_minutes: 180
|
||||
mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
|
||||
@@ -3182,6 +3192,8 @@ steps:
|
||||
- pytest -v -s models/multimodal/generation/test_memory_leak.py -m core_model
|
||||
- cd .. && VLLM_WORKER_MULTIPROC_METHOD=spawn pytest -v -s tests/models/multimodal/generation/test_whisper.py -m core_model
|
||||
|
||||
#----------------------------------------------------- mi355 · models / quantized -----------------------------------------------------#
|
||||
|
||||
- label: Quantized Models Test # TBD
|
||||
timeout_in_minutes: 180
|
||||
mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]
|
||||
|
||||
Reference in New Issue
Block a user