diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index e53ca5023dc..3cadab548fb 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -139,19 +139,6 @@ steps: - pytest models/multimodal -v -s -m 'distributed(num_gpus=2)' --ignore models/multimodal/generation/test_whisper.py - VLLM_WORKER_MULTIPROC_METHOD=spawn pytest models/multimodal/generation/test_whisper.py -v -s -m 'distributed(num_gpus=2)' -#-------------------------------------------------------- mi250 · benchmarks ---------------------------------------------------------# - -- label: Benchmarks # TBD - timeout_in_minutes: 180 - mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250] - agent_pool: mi250_1 - working_dir: "/vllm-workspace/.buildkite" - source_file_dependencies: - - benchmarks/ - - vllm/platforms/rocm.py - commands: - - bash scripts/run-benchmarks.sh - #---------------------------------------------------------- mi250 · compile ----------------------------------------------------------# - label: PyTorch Compilation Unit Tests # TBD @@ -485,7 +472,7 @@ steps: - pytest -v -s model_executor -m '(not slow_test)' - pytest -v -s entrypoints/openai/completion/test_tensorizer_entrypoint.py -#---------------------------------------------------------- mi250 · models -----------------------------------------------------------# +#------------------------------------------------------ mi250 · models / basic -------------------------------------------------------# - label: Basic Models Test (Other CPU) # TBD timeout_in_minutes: 180 @@ -546,6 +533,8 @@ steps: commands: - pytest -v -s models/test_terratorch.py models/test_transformers.py models/test_registry.py +#----------------------------------------------------- mi250 · models / language -----------------------------------------------------# + - label: Language Models Test (MTEB) # TBD timeout_in_minutes: 180 mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250] @@ -590,6 +579,8 @@ steps: - pip freeze | grep -E 'torch' - pytest -v -s models/language -m 'core_model and slow_test' --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB +#---------------------------------------------------- mi250 · models / multimodal ----------------------------------------------------# + - label: Multi-Modal Models (Extended Generation 2) # TBD timeout_in_minutes: 180 mirror_hardwares: [amdexperimental, amdproduction, amdgfx90anightly, amdmi250] @@ -976,18 +967,6 @@ steps: #-------------------------------------------------------- mi300 · benchmarks ---------------------------------------------------------# -- label: Benchmarks # TBD - timeout_in_minutes: 180 - mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300] - agent_pool: mi300_1 - optional: true - working_dir: "/vllm-workspace/.buildkite" - source_file_dependencies: - - benchmarks/ - - vllm/platforms/rocm.py - commands: - - bash scripts/run-benchmarks.sh - - label: Benchmarks CLI Test # TBD timeout_in_minutes: 180 mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300] @@ -1759,7 +1738,7 @@ steps: - pytest -v -s -x lora/test_gptoss_tp.py - pytest -v -s -x lora/test_qwen35_densemodel_lora.py -#---------------------------------------------------------- mi300 · models -----------------------------------------------------------# +#----------------------------------------------------- mi300 · models / language -----------------------------------------------------# - label: Language Models Test (Extended Pooling) # TBD timeout_in_minutes: 180 @@ -1787,6 +1766,8 @@ steps: - pip freeze | grep -E 'torch' - pytest -v -s models/language -m 'core_model and (not slow_test)' +#---------------------------------------------------- mi300 · models / multimodal ----------------------------------------------------# + - label: Multi-Modal Models (Extended Generation 1) # TBD timeout_in_minutes: 180 mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300] @@ -1892,10 +1873,11 @@ steps: - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git - pytest -v -s models/multimodal/processing/test_tensor_schema.py -- label: Multi-Modal Processor (CPU) # TBD +- label: Multi-Modal Processor (CPU) %N # TBD timeout_in_minutes: 180 mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300] agent_pool: mi300_1 + parallelism: 4 no_gpu: true optional: true working_dir: "/vllm-workspace/tests" @@ -1905,7 +1887,9 @@ steps: - tests/models/registry.py commands: - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git - - pytest -v -s models/multimodal/processing --ignore models/multimodal/processing/test_tensor_schema.py + - pytest -v -s models/multimodal/processing --ignore models/multimodal/processing/test_tensor_schema.py --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB + +#----------------------------------------------------- mi300 · models / quantized -----------------------------------------------------# - label: Quantized Models Test # TBD timeout_in_minutes: 180 @@ -1921,7 +1905,31 @@ steps: commands: - pytest -v -s models/quantization -- label: Transformers Nightly Models # TBD +#-------------------------------------------------- mi300 · models / transformers ---------------------------------------------------# + +- label: Transformers Nightly Models (Shardable) %N # TBD + timeout_in_minutes: 180 + mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300] + agent_pool: mi300_1 + parallelism: 4 + optional: true + working_dir: "/vllm-workspace/" + source_file_dependencies: + - vllm/model_executor/models/ + - vllm/model_executor/model_loader/ + - vllm/multimodal/ + - vllm/model_executor/layers/ + - vllm/v1/attention/backends/ + - vllm/v1/attention/selector.py + - vllm/_aiter_ops.py + - vllm/platforms/rocm.py + - tests/models/ + commands: + - pip install --upgrade git+https://github.com/huggingface/transformers + - pytest -v -s tests/models/test_initialization.py --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB + - pytest -v -s tests/models/multimodal/processing/ --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB + +- label: Transformers Nightly Models (Single) # TBD timeout_in_minutes: 180 mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300] agent_pool: mi300_1 @@ -1940,9 +1948,7 @@ steps: - examples/ commands: - pip install --upgrade git+https://github.com/huggingface/transformers - - pytest -v -s tests/models/test_initialization.py - pytest -v -s tests/models/test_transformers.py - - pytest -v -s tests/models/multimodal/processing/ - pytest -v -s tests/models/multimodal/test_mapping.py - python3 examples/basic/offline_inference/chat.py - python3 examples/generate/multimodal/vision_language_offline.py --model-type qwen2_5_vl @@ -2391,7 +2397,7 @@ steps: - uv pip install --system -r /vllm-workspace/requirements/kv_connectors_rocm.txt - DP_EP=1 ROCM_ATTN=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh -- label: Hyrbid SSM NixlConnector PD accuracy tests (4 GPUs) # TBD +- label: Hybrid SSM NixlConnector PD accuracy tests (4 GPUs) # TBD timeout_in_minutes: 180 mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi300] agent_pool: mi300_4 @@ -2593,7 +2599,7 @@ steps: - export VLLM_WORKER_MULTIPROC_METHOD=spawn - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large-rocm.txt --tp-size=8 -#---------------------------------------------------------- mi325 · models -----------------------------------------------------------# +#----------------------------------------------------- mi325 · models / language -----------------------------------------------------# - label: Language Models Test (Extended Generation) # TBD timeout_in_minutes: 180 @@ -2624,6 +2630,8 @@ steps: - uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.6.0' - pytest -v -s models/language/generation -m hybrid_model --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB +#---------------------------------------------------- mi325 · models / multimodal ----------------------------------------------------# + - label: Multi-Modal Models (Extended Pooling) # TBD timeout_in_minutes: 180 mirror_hardwares: [amdexperimental, amdproduction, amdgfx942nightly, amdmi325] @@ -3043,7 +3051,7 @@ steps: commands: - pytest -v -s kernels/moe/test_deepep_moe.py -#---------------------------------------------------------- mi355 · models -----------------------------------------------------------# +#----------------------------------------------------- mi355 · models / language -----------------------------------------------------# - label: Language Models Test (Extended Generation) # TBD timeout_in_minutes: 180 @@ -3111,6 +3119,8 @@ steps: - pip freeze | grep -E 'torch' - pytest -v -s models/language -m 'core_model and (not slow_test)' +#---------------------------------------------------- mi355 · models / multimodal ----------------------------------------------------# + - label: Multi-Modal Models (Extended Generation 1) # TBD timeout_in_minutes: 180 mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355] @@ -3182,6 +3192,8 @@ steps: - pytest -v -s models/multimodal/generation/test_memory_leak.py -m core_model - cd .. && VLLM_WORKER_MULTIPROC_METHOD=spawn pytest -v -s tests/models/multimodal/generation/test_whisper.py -m core_model +#----------------------------------------------------- mi355 · models / quantized -----------------------------------------------------# + - label: Quantized Models Test # TBD timeout_in_minutes: 180 mirror_hardwares: [amdexperimental, amdproduction, amdgfx950nightly, amdmi355]