[ROCm][CI] Cleaning and restructuring amd-ci legacy pipeline (#34839)

Signed-off-by: Andreas Karatzas <akaratza@amd.com>
This commit is contained in:
Andreas Karatzas
2026-03-19 14:30:58 -05:00
committed by GitHub
parent 9279c59a0e
commit 040a505ff5
15 changed files with 2486 additions and 3554 deletions
@@ -0,0 +1 @@
Qwen3-235B-A22B-Instruct-2507-FP8.yaml
+23 -1
View File
@@ -16,6 +16,23 @@ RAY_BASE_URL="https://raw.githubusercontent.com/ray-project/ray/master/python"
WORK_DIR=$(mktemp -d)
trap 'rm -rf "$WORK_DIR"' EXIT
# ── Detect PyTorch index URL ─────────────────────────────────────────────
if python3 -c "import torch; assert torch.version.hip" 2>/dev/null; then
ROCM_VER=$(python3 -c "import torch; print(torch.version.hip.rsplit('.', 1)[0])")
CANDIDATE_URL="https://download.pytorch.org/whl/rocm${ROCM_VER}"
if curl -fsSL --head "${CANDIDATE_URL}/" >/dev/null 2>&1; then
TORCH_INDEX_URL="${CANDIDATE_URL}"
else
echo ">>> WARNING: ROCm ${ROCM_VER} wheel index not found at ${CANDIDATE_URL}"
echo ">>> Falling back to default PyPI (resolution may be incomplete)"
TORCH_INDEX_URL=""
fi
else
TORCH_INDEX_URL="https://download.pytorch.org/whl/cu129"
fi
echo ">>> Using PyTorch index: ${TORCH_INDEX_URL:-PyPI default}"
# Fetch all Ray requirement files used in the LLM depset pipeline
echo ">>> Fetching Ray requirement files"
RAY_FILES=(
@@ -116,6 +133,11 @@ echo "============================================================"
echo ">>> Resolving: Can Ray generate compatible lock files?"
echo "============================================================"
EXTRA_INDEX_ARGS=()
if [[ -n "${TORCH_INDEX_URL}" ]]; then
EXTRA_INDEX_ARGS+=(--extra-index-url "${TORCH_INDEX_URL}")
fi
set +e
uv pip compile \
"${WORK_DIR}/requirements.txt" \
@@ -126,7 +148,7 @@ uv pip compile \
-c "${WORK_DIR}/vllm-constraints.txt" \
--python-version 3.12 \
--python-platform x86_64-manylinux_2_31 \
--extra-index-url https://download.pytorch.org/whl/cu129 \
"${EXTRA_INDEX_ARGS[@]}" \
--index-strategy unsafe-best-match \
--unsafe-package setuptools \
--unsafe-package ray \
@@ -1,11 +1,14 @@
#!/usr/bin/env bash
set -euxo pipefail
# Nightly e2e test for prefetch offloading with a MoE model.
# Runs DeepSeek-V2-Lite with prefetch offloading of MoE expert weights
# and validates GSM8K accuracy matches baseline (no offloading).
#
# args: [THRESHOLD] [NUM_QUESTIONS] [START_PORT]
#
# Environment variables:
# ATTENTION_BACKEND - attention backend to use (e.g., FLASH_ATTN,
# ROCM_ATTN, FLASHINFER). If unset, uses vllm default.
THRESHOLD=${1:-0.25}
NUM_Q=${2:-1319}
PORT=${3:-8030}
@@ -22,6 +25,14 @@ wait_for_server() {
MODEL="deepseek-ai/DeepSeek-V2-Lite"
# ── Build optional vllm serve flags ─────────────────────────────────────
EXTRA_ARGS=()
if [[ -n "${ATTENTION_BACKEND:-}" ]]; then
echo "Using attention backend: ${ATTENTION_BACKEND}"
EXTRA_ARGS+=(--attention-backend "${ATTENTION_BACKEND}")
fi
cleanup() {
if [[ -n "${SERVER_PID:-}" ]] && kill -0 "${SERVER_PID}" 2>/dev/null; then
kill "${SERVER_PID}" 2>/dev/null || true
@@ -40,7 +51,8 @@ vllm serve "$MODEL" \
--offload-num-in-group 2 \
--offload-prefetch-step 1 \
--offload-params w13_weight w2_weight \
--port "$PORT" &
--port "$PORT" \
${EXTRA_ARGS+"${EXTRA_ARGS[@]}"} &
SERVER_PID=$!
wait_for_server "$PORT"
+2373 -3544
View File
File diff suppressed because it is too large Load Diff
+1 -1
View File
@@ -59,7 +59,7 @@ steps:
- export VLLM_TEST_CLEAN_GPU_MEMORY=1
- pytest -s -v tests/compile/passes/distributed
- label: Fusion and Compile Unit Tests (B200)
- label: Fusion and Compile Unit Tests (2xB200)
timeout_in_minutes: 20
working_dir: "/vllm-workspace/"
device: b200
@@ -1,3 +1,3 @@
# GFX942 model configurations for GPQA evaluation
# Tests different environment variable combinations
gpt-oss-20b-rocm-baseline.yaml
gpt-oss-20b-rocm-baseline.yaml
@@ -0,0 +1,12 @@
model_name: "deepseek-ai/DeepSeek-R1"
accuracy_threshold: 0.95
num_questions: 1319
num_fewshot: 5
startup_max_wait_seconds: 1200
server_args: >-
--enforce-eager
--max-model-len 4096
--data-parallel-size 8
--enable-expert-parallel
--attention-backend=TRITON_ATTN
--speculative-config '{"method":"mtp","num_speculative_tokens":3}'
@@ -0,0 +1,12 @@
model_name: "deepseek-ai/DeepSeek-R1"
accuracy_threshold: 0.95
num_questions: 1319
num_fewshot: 5
startup_max_wait_seconds: 1200
server_args: >-
--enforce-eager
--max-model-len 4096
--tensor-parallel-size 8
--enable-expert-parallel
--attention-backend=TRITON_ATTN
--speculative-config '{"method":"mtp","num_speculative_tokens":3}'
@@ -0,0 +1,12 @@
model_name: "deepseek-ai/DeepSeek-V3.2"
accuracy_threshold: 0.95
num_questions: 1319
num_fewshot: 5
startup_max_wait_seconds: 1200
server_args: >-
--enforce-eager
--max-model-len 4096
--data-parallel-size 8
--enable-expert-parallel
--attention-backend=TRITON_ATTN
--speculative-config '{"method":"mtp","num_speculative_tokens":3}'
@@ -0,0 +1,12 @@
model_name: "deepseek-ai/DeepSeek-V3.2"
accuracy_threshold: 0.95
num_questions: 1319
num_fewshot: 5
startup_max_wait_seconds: 1200
server_args: >-
--enforce-eager
--max-model-len 4096
--tensor-parallel-size 8
--enable-expert-parallel
--attention-backend=TRITON_ATTN
--speculative-config '{"method":"mtp","num_speculative_tokens":3}'
@@ -0,0 +1,4 @@
DeepSeek-R1-TP_MI325.yaml
DeepSeek-R1-DP_MI325.yaml
DeepSeek-V3.2-TP_MI325.yaml
DeepSeek-V3.2-DP_MI325.yaml
@@ -64,6 +64,16 @@ def test_gsm8k_correctness(config_filename):
"Marlin kernels are not supported."
)
# TODO(akaratza): Enable DeepSeek-V3.2 and DeepSeek-R1 on ROCm platforms
if current_platform.is_rocm() and (
"deepseek-ai/DeepSeek-V3.2" in eval_config["model_name"]
or "deepseek-ai/DeepSeek-R1" in eval_config["model_name"]
):
pytest.skip(
"Skipping DeepSeek-V3.2 and DeepSeek-R1 on ROCm platforms "
"due to agent pool disk space issues and pod evictions."
)
# Parse server arguments from config (use shlex to handle quoted strings)
server_args_str = eval_config.get("server_args", "")
server_args = shlex.split(server_args_str) if server_args_str else []
+6
View File
@@ -0,0 +1,6 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
def test_mi3xx_moe():
print("TODO: add tests for Mi3xx MoE quantization")
@@ -31,7 +31,7 @@ class TestAiterMlaFp8SupportCheck:
# Should return False without raising
with patch(
"vllm._aiter_ops.inspect.signature",
"inspect.signature",
side_effect=ImportError("No module"),
):
result = _check_aiter_mla_fp8_support()
@@ -46,7 +46,7 @@ class TestAiterMlaFp8SupportCheck:
aiter_ops._AITER_MLA_SUPPORTS_FP8 = None
with patch(
"vllm._aiter_ops.inspect.signature",
"inspect.signature",
side_effect=ModuleNotFoundError("Module not found"),
):
# Should return False without raising
@@ -63,7 +63,7 @@ class TestAiterMlaFp8SupportCheck:
aiter_ops._AITER_MLA_SUPPORTS_FP8 = None
with patch(
"vllm._aiter_ops.inspect.signature",
"inspect.signature",
side_effect=AttributeError("No attribute"),
):
assert _check_aiter_mla_fp8_support() is False
@@ -78,7 +78,7 @@ class TestAiterMlaFp8SupportCheck:
aiter_ops._AITER_MLA_SUPPORTS_FP8 = None
with patch(
"vllm._aiter_ops.inspect.signature",
"inspect.signature",
side_effect=ValueError("No signature"),
):
assert _check_aiter_mla_fp8_support() is False
@@ -93,7 +93,7 @@ class TestAiterMlaFp8SupportCheck:
aiter_ops._AITER_MLA_SUPPORTS_FP8 = None
with patch(
"vllm._aiter_ops.inspect.signature",
"inspect.signature",
side_effect=TypeError("Not a callable"),
):
assert _check_aiter_mla_fp8_support() is False