mirror of
https://github.com/vllm-project/vllm.git
synced 2026-06-06 00:16:14 +00:00
[CI] Enable prefix caching in BFCL benchmark (#43925)
Signed-off-by: Yifan Zong <yzong@redhat.com>
This commit is contained in:
@@ -70,7 +70,7 @@ echo "============================================"
|
||||
# ---- Install bfcl-eval if missing ----
|
||||
if ! python3 -c "import bfcl_eval" 2>/dev/null; then
|
||||
echo "Installing bfcl-eval..."
|
||||
pip install "bfcl-eval>=2025.10.20.1,<2026"
|
||||
uv pip install "bfcl-eval>=2025.10.20.1,<2026"
|
||||
fi
|
||||
|
||||
# ---- Cleanup handler ----
|
||||
@@ -100,7 +100,7 @@ SERVE_ARGS=(
|
||||
--tensor-parallel-size "$TP_SIZE"
|
||||
--max-model-len "$MAX_MODEL_LEN"
|
||||
--enforce-eager
|
||||
--no-enable-prefix-caching
|
||||
--enable-prefix-caching
|
||||
)
|
||||
|
||||
# Append reasoning parser if specified
|
||||
|
||||
Reference in New Issue
Block a user