[CI] Enable prefix caching in BFCL benchmark (#43925)

Signed-off-by: Yifan Zong <yzong@redhat.com>
This commit is contained in:
yzong-rh
2026-05-28 19:36:31 -04:00
committed by GitHub
parent 69c9f19957
commit 325a1ec4fb
@@ -70,7 +70,7 @@ echo "============================================"
# ---- Install bfcl-eval if missing ----
if ! python3 -c "import bfcl_eval" 2>/dev/null; then
echo "Installing bfcl-eval..."
pip install "bfcl-eval>=2025.10.20.1,<2026"
uv pip install "bfcl-eval>=2025.10.20.1,<2026"
fi
# ---- Cleanup handler ----
@@ -100,7 +100,7 @@ SERVE_ARGS=(
--tensor-parallel-size "$TP_SIZE"
--max-model-len "$MAX_MODEL_LEN"
--enforce-eager
--no-enable-prefix-caching
--enable-prefix-caching
)
# Append reasoning parser if specified