mirror of
https://github.com/vllm-project/vllm.git
synced 2026-06-06 00:16:14 +00:00
[ROCm][CI] Cleaning and restructuring amd-ci legacy pipeline (#34839)
Signed-off-by: Andreas Karatzas <akaratza@amd.com>
This commit is contained in:
@@ -0,0 +1 @@
|
||||
Qwen3-235B-A22B-Instruct-2507-FP8.yaml
|
||||
@@ -16,6 +16,23 @@ RAY_BASE_URL="https://raw.githubusercontent.com/ray-project/ray/master/python"
|
||||
WORK_DIR=$(mktemp -d)
|
||||
trap 'rm -rf "$WORK_DIR"' EXIT
|
||||
|
||||
# ── Detect PyTorch index URL ─────────────────────────────────────────────
|
||||
|
||||
if python3 -c "import torch; assert torch.version.hip" 2>/dev/null; then
|
||||
ROCM_VER=$(python3 -c "import torch; print(torch.version.hip.rsplit('.', 1)[0])")
|
||||
CANDIDATE_URL="https://download.pytorch.org/whl/rocm${ROCM_VER}"
|
||||
if curl -fsSL --head "${CANDIDATE_URL}/" >/dev/null 2>&1; then
|
||||
TORCH_INDEX_URL="${CANDIDATE_URL}"
|
||||
else
|
||||
echo ">>> WARNING: ROCm ${ROCM_VER} wheel index not found at ${CANDIDATE_URL}"
|
||||
echo ">>> Falling back to default PyPI (resolution may be incomplete)"
|
||||
TORCH_INDEX_URL=""
|
||||
fi
|
||||
else
|
||||
TORCH_INDEX_URL="https://download.pytorch.org/whl/cu129"
|
||||
fi
|
||||
echo ">>> Using PyTorch index: ${TORCH_INDEX_URL:-PyPI default}"
|
||||
|
||||
# Fetch all Ray requirement files used in the LLM depset pipeline
|
||||
echo ">>> Fetching Ray requirement files"
|
||||
RAY_FILES=(
|
||||
@@ -116,6 +133,11 @@ echo "============================================================"
|
||||
echo ">>> Resolving: Can Ray generate compatible lock files?"
|
||||
echo "============================================================"
|
||||
|
||||
EXTRA_INDEX_ARGS=()
|
||||
if [[ -n "${TORCH_INDEX_URL}" ]]; then
|
||||
EXTRA_INDEX_ARGS+=(--extra-index-url "${TORCH_INDEX_URL}")
|
||||
fi
|
||||
|
||||
set +e
|
||||
uv pip compile \
|
||||
"${WORK_DIR}/requirements.txt" \
|
||||
@@ -126,7 +148,7 @@ uv pip compile \
|
||||
-c "${WORK_DIR}/vllm-constraints.txt" \
|
||||
--python-version 3.12 \
|
||||
--python-platform x86_64-manylinux_2_31 \
|
||||
--extra-index-url https://download.pytorch.org/whl/cu129 \
|
||||
"${EXTRA_INDEX_ARGS[@]}" \
|
||||
--index-strategy unsafe-best-match \
|
||||
--unsafe-package setuptools \
|
||||
--unsafe-package ray \
|
||||
|
||||
@@ -1,11 +1,14 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euxo pipefail
|
||||
|
||||
# Nightly e2e test for prefetch offloading with a MoE model.
|
||||
# Runs DeepSeek-V2-Lite with prefetch offloading of MoE expert weights
|
||||
# and validates GSM8K accuracy matches baseline (no offloading).
|
||||
#
|
||||
# args: [THRESHOLD] [NUM_QUESTIONS] [START_PORT]
|
||||
#
|
||||
# Environment variables:
|
||||
# ATTENTION_BACKEND - attention backend to use (e.g., FLASH_ATTN,
|
||||
# ROCM_ATTN, FLASHINFER). If unset, uses vllm default.
|
||||
THRESHOLD=${1:-0.25}
|
||||
NUM_Q=${2:-1319}
|
||||
PORT=${3:-8030}
|
||||
@@ -22,6 +25,14 @@ wait_for_server() {
|
||||
|
||||
MODEL="deepseek-ai/DeepSeek-V2-Lite"
|
||||
|
||||
# ── Build optional vllm serve flags ─────────────────────────────────────
|
||||
|
||||
EXTRA_ARGS=()
|
||||
if [[ -n "${ATTENTION_BACKEND:-}" ]]; then
|
||||
echo "Using attention backend: ${ATTENTION_BACKEND}"
|
||||
EXTRA_ARGS+=(--attention-backend "${ATTENTION_BACKEND}")
|
||||
fi
|
||||
|
||||
cleanup() {
|
||||
if [[ -n "${SERVER_PID:-}" ]] && kill -0 "${SERVER_PID}" 2>/dev/null; then
|
||||
kill "${SERVER_PID}" 2>/dev/null || true
|
||||
@@ -40,7 +51,8 @@ vllm serve "$MODEL" \
|
||||
--offload-num-in-group 2 \
|
||||
--offload-prefetch-step 1 \
|
||||
--offload-params w13_weight w2_weight \
|
||||
--port "$PORT" &
|
||||
--port "$PORT" \
|
||||
${EXTRA_ARGS+"${EXTRA_ARGS[@]}"} &
|
||||
SERVER_PID=$!
|
||||
wait_for_server "$PORT"
|
||||
|
||||
|
||||
+2373
-3544
File diff suppressed because it is too large
Load Diff
@@ -59,7 +59,7 @@ steps:
|
||||
- export VLLM_TEST_CLEAN_GPU_MEMORY=1
|
||||
- pytest -s -v tests/compile/passes/distributed
|
||||
|
||||
- label: Fusion and Compile Unit Tests (B200)
|
||||
- label: Fusion and Compile Unit Tests (2xB200)
|
||||
timeout_in_minutes: 20
|
||||
working_dir: "/vllm-workspace/"
|
||||
device: b200
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
# GFX942 model configurations for GPQA evaluation
|
||||
# Tests different environment variable combinations
|
||||
gpt-oss-20b-rocm-baseline.yaml
|
||||
gpt-oss-20b-rocm-baseline.yaml
|
||||
|
||||
@@ -0,0 +1,12 @@
|
||||
model_name: "deepseek-ai/DeepSeek-R1"
|
||||
accuracy_threshold: 0.95
|
||||
num_questions: 1319
|
||||
num_fewshot: 5
|
||||
startup_max_wait_seconds: 1200
|
||||
server_args: >-
|
||||
--enforce-eager
|
||||
--max-model-len 4096
|
||||
--data-parallel-size 8
|
||||
--enable-expert-parallel
|
||||
--attention-backend=TRITON_ATTN
|
||||
--speculative-config '{"method":"mtp","num_speculative_tokens":3}'
|
||||
@@ -0,0 +1,12 @@
|
||||
model_name: "deepseek-ai/DeepSeek-R1"
|
||||
accuracy_threshold: 0.95
|
||||
num_questions: 1319
|
||||
num_fewshot: 5
|
||||
startup_max_wait_seconds: 1200
|
||||
server_args: >-
|
||||
--enforce-eager
|
||||
--max-model-len 4096
|
||||
--tensor-parallel-size 8
|
||||
--enable-expert-parallel
|
||||
--attention-backend=TRITON_ATTN
|
||||
--speculative-config '{"method":"mtp","num_speculative_tokens":3}'
|
||||
@@ -0,0 +1,12 @@
|
||||
model_name: "deepseek-ai/DeepSeek-V3.2"
|
||||
accuracy_threshold: 0.95
|
||||
num_questions: 1319
|
||||
num_fewshot: 5
|
||||
startup_max_wait_seconds: 1200
|
||||
server_args: >-
|
||||
--enforce-eager
|
||||
--max-model-len 4096
|
||||
--data-parallel-size 8
|
||||
--enable-expert-parallel
|
||||
--attention-backend=TRITON_ATTN
|
||||
--speculative-config '{"method":"mtp","num_speculative_tokens":3}'
|
||||
@@ -0,0 +1,12 @@
|
||||
model_name: "deepseek-ai/DeepSeek-V3.2"
|
||||
accuracy_threshold: 0.95
|
||||
num_questions: 1319
|
||||
num_fewshot: 5
|
||||
startup_max_wait_seconds: 1200
|
||||
server_args: >-
|
||||
--enforce-eager
|
||||
--max-model-len 4096
|
||||
--tensor-parallel-size 8
|
||||
--enable-expert-parallel
|
||||
--attention-backend=TRITON_ATTN
|
||||
--speculative-config '{"method":"mtp","num_speculative_tokens":3}'
|
||||
@@ -0,0 +1,4 @@
|
||||
DeepSeek-R1-TP_MI325.yaml
|
||||
DeepSeek-R1-DP_MI325.yaml
|
||||
DeepSeek-V3.2-TP_MI325.yaml
|
||||
DeepSeek-V3.2-DP_MI325.yaml
|
||||
@@ -64,6 +64,16 @@ def test_gsm8k_correctness(config_filename):
|
||||
"Marlin kernels are not supported."
|
||||
)
|
||||
|
||||
# TODO(akaratza): Enable DeepSeek-V3.2 and DeepSeek-R1 on ROCm platforms
|
||||
if current_platform.is_rocm() and (
|
||||
"deepseek-ai/DeepSeek-V3.2" in eval_config["model_name"]
|
||||
or "deepseek-ai/DeepSeek-R1" in eval_config["model_name"]
|
||||
):
|
||||
pytest.skip(
|
||||
"Skipping DeepSeek-V3.2 and DeepSeek-R1 on ROCm platforms "
|
||||
"due to agent pool disk space issues and pod evictions."
|
||||
)
|
||||
|
||||
# Parse server arguments from config (use shlex to handle quoted strings)
|
||||
server_args_str = eval_config.get("server_args", "")
|
||||
server_args = shlex.split(server_args_str) if server_args_str else []
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
|
||||
def test_mi3xx_moe():
|
||||
print("TODO: add tests for Mi3xx MoE quantization")
|
||||
@@ -31,7 +31,7 @@ class TestAiterMlaFp8SupportCheck:
|
||||
|
||||
# Should return False without raising
|
||||
with patch(
|
||||
"vllm._aiter_ops.inspect.signature",
|
||||
"inspect.signature",
|
||||
side_effect=ImportError("No module"),
|
||||
):
|
||||
result = _check_aiter_mla_fp8_support()
|
||||
@@ -46,7 +46,7 @@ class TestAiterMlaFp8SupportCheck:
|
||||
aiter_ops._AITER_MLA_SUPPORTS_FP8 = None
|
||||
|
||||
with patch(
|
||||
"vllm._aiter_ops.inspect.signature",
|
||||
"inspect.signature",
|
||||
side_effect=ModuleNotFoundError("Module not found"),
|
||||
):
|
||||
# Should return False without raising
|
||||
@@ -63,7 +63,7 @@ class TestAiterMlaFp8SupportCheck:
|
||||
aiter_ops._AITER_MLA_SUPPORTS_FP8 = None
|
||||
|
||||
with patch(
|
||||
"vllm._aiter_ops.inspect.signature",
|
||||
"inspect.signature",
|
||||
side_effect=AttributeError("No attribute"),
|
||||
):
|
||||
assert _check_aiter_mla_fp8_support() is False
|
||||
@@ -78,7 +78,7 @@ class TestAiterMlaFp8SupportCheck:
|
||||
aiter_ops._AITER_MLA_SUPPORTS_FP8 = None
|
||||
|
||||
with patch(
|
||||
"vllm._aiter_ops.inspect.signature",
|
||||
"inspect.signature",
|
||||
side_effect=ValueError("No signature"),
|
||||
):
|
||||
assert _check_aiter_mla_fp8_support() is False
|
||||
@@ -93,7 +93,7 @@ class TestAiterMlaFp8SupportCheck:
|
||||
aiter_ops._AITER_MLA_SUPPORTS_FP8 = None
|
||||
|
||||
with patch(
|
||||
"vllm._aiter_ops.inspect.signature",
|
||||
"inspect.signature",
|
||||
side_effect=TypeError("Not a callable"),
|
||||
):
|
||||
assert _check_aiter_mla_fp8_support() is False
|
||||
|
||||
Reference in New Issue
Block a user