[Rust Frontend] Move code from vllm-frontend-rs (#43283)

Signed-off-by: Bugen Zhao <i@bugenzhao.com>
Signed-off-by: Nick Hill <nickhill123@gmail.com>
Signed-off-by: Eric Curtin <eric.curtin@docker.com>
Signed-off-by: Dev-X25874 <283057883+Dev-X25874@users.noreply.github.com>
Signed-off-by: Will.hou <1205157517@qq.com>
Signed-off-by: Will.hou <willamhou@ceresman.com>
Co-authored-by: Nick Hill <nickhill123@gmail.com>
Co-authored-by: Eric Curtin <eric.curtin@docker.com>
Co-authored-by: Dev-X25874 <283057883+Dev-X25874@users.noreply.github.com>
Co-authored-by: Will.hou <1205157517@qq.com>
Co-authored-by: Will.hou <willamhou@ceresman.com>

Please see https://github.com/Inferact/vllm-frontend-rs for full original commit history.
This commit is contained in:
Bugen Zhao
2026-05-22 08:21:48 +08:00
committed by GitHub
parent 39d5fa96a7
commit 39910f2b25
284 changed files with 72569 additions and 90 deletions
-7
View File
@@ -223,13 +223,6 @@ echo "CACHE_FROM_MAIN: ${CACHE_FROM_MAIN}"
check_and_skip_if_image_exists
# The rust frontend lives in a git submodule under rust/. Buildkite's default
# checkout does not recurse submodules, and the Dockerfile only sees what's in
# the build context, so initialize the submodule here before invoking bake.
echo "--- :git: Initializing git submodules"
git submodule sync --recursive
git submodule update --init --recursive
echo "--- :docker: Setting up Docker buildx bake"
echo "Target: ${TARGET}"
echo "vLLM bake file: ${VLLM_BAKE_FILE_PATH}"
@@ -21,12 +21,6 @@ else
exit 0
fi
# The rust frontend lives in a git submodule under rust/. Buildkite's default
# checkout does not recurse submodules, and the Dockerfile only sees what's in
# the build context, so initialize the submodule here before building.
git submodule sync --recursive
git submodule update --init --recursive
# build
docker build --file docker/Dockerfile.cpu \
--build-arg max_jobs=16 \
@@ -21,12 +21,6 @@ else
exit 0
fi
# The rust frontend lives in a git submodule under rust/. Buildkite's default
# checkout does not recurse submodules, and the Dockerfile only sees what's in
# the build context, so initialize the submodule here before building.
git submodule sync --recursive
git submodule update --init --recursive
# build
docker build --file docker/Dockerfile.cpu \
--build-arg max_jobs=16 \
+156
View File
@@ -0,0 +1,156 @@
#!/usr/bin/env bash
set -euo pipefail
MODE="${1:-}"
if [[ "$MODE" != "style-clippy" && "$MODE" != "test" ]]; then
echo "Usage: $0 {style-clippy|test}" >&2
exit 2
fi
ROOT_DIR="$(git rev-parse --show-toplevel)"
cd "$ROOT_DIR"
export CARGO_TERM_COLOR="${CARGO_TERM_COLOR:-always}"
export CARGO_HOME="${CARGO_HOME:-$HOME/.cargo}"
export RUSTUP_HOME="${RUSTUP_HOME:-$HOME/.rustup}"
export PATH="$CARGO_HOME/bin:$PATH"
log_section() {
echo "--- $*"
}
install_protoc() {
if command -v protoc >/dev/null 2>&1; then
return
fi
local version="${PROTOC_VERSION:-31.1}"
local arch
case "$(uname -m)" in
x86_64)
arch="x86_64"
;;
aarch64|arm64)
arch="aarch_64"
;;
*)
echo "Unsupported protoc architecture: $(uname -m)" >&2
return 1
;;
esac
local url="https://github.com/protocolbuffers/protobuf/releases/download/v${version}/protoc-${version}-linux-${arch}.zip"
local tmp_dir
tmp_dir="$(mktemp -d)"
log_section "Installing protoc ${version}"
curl -L --proto '=https' --tlsv1.2 -sSf "$url" -o "$tmp_dir/protoc.zip"
mkdir -p "$CARGO_HOME/bin"
unzip -q "$tmp_dir/protoc.zip" bin/protoc 'include/*' -d "$CARGO_HOME"
chmod +x "$CARGO_HOME/bin/protoc"
rm -rf "$tmp_dir"
}
rust_toolchain() {
awk -F '"' '/channel[[:space:]]*=/ { print $2; exit }' rust-toolchain.toml
}
install_rust_toolchain() {
log_section "Installing Rust toolchain"
if ! command -v rustup >/dev/null 2>&1; then
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \
| sh -s -- -y --profile minimal --default-toolchain none
fi
local toolchain
toolchain="$(rust_toolchain)"
rustup toolchain install "$toolchain" --profile minimal --component rustfmt,clippy
rustup component add --toolchain "$toolchain" rustfmt clippy
}
install_cargo_binstall() {
if command -v cargo-binstall >/dev/null 2>&1; then
return
fi
log_section "Installing cargo-binstall"
curl -L --proto '=https' --tlsv1.2 -sSf \
https://raw.githubusercontent.com/cargo-bins/cargo-binstall/main/install-from-binstall-release.sh \
| bash
}
install_cargo_sort() {
if command -v cargo-sort >/dev/null 2>&1; then
return
fi
log_section "Installing cargo-sort"
install_cargo_binstall
cargo binstall --no-confirm cargo-sort
}
install_cargo_nextest() {
if command -v cargo-nextest >/dev/null 2>&1; then
return
fi
log_section "Installing cargo-nextest"
install_cargo_binstall
cargo binstall --no-confirm --secure cargo-nextest
}
install_uv() {
if command -v uv >/dev/null 2>&1; then
return
fi
log_section "Installing uv"
curl -LsSf --proto '=https' --tlsv1.2 https://astral.sh/uv/install.sh \
| env UV_INSTALL_DIR="$CARGO_HOME/bin" sh
}
run_style_clippy() {
install_cargo_sort
log_section "Checking Rust formatting"
cargo fmt --manifest-path rust/Cargo.toml --all -- --check
log_section "Checking Cargo.toml ordering"
cargo sort --workspace --check rust
log_section "Running clippy"
cargo clippy \
--manifest-path rust/Cargo.toml \
--workspace \
--all-targets \
--all-features \
--locked \
-- \
-D warnings
}
run_tests() {
install_uv
install_cargo_nextest
log_section "Running cargo nextest"
cargo nextest run \
--manifest-path rust/Cargo.toml \
--workspace \
--all-features \
--locked \
--no-fail-fast
}
install_protoc
install_rust_toolchain
case "$MODE" in
style-clippy)
run_style_clippy
;;
test)
run_tests
;;
esac
+1 -1
View File
@@ -1,4 +1,4 @@
group: Rust Frontend
group: Rust Frontend E2E
depends_on:
- image-build
steps:
@@ -0,0 +1,30 @@
group: Rust Frontend Cargo
depends_on: []
steps:
- label: Rust Frontend Cargo Style + Clippy
key: rust-frontend-cargo-style-clippy
depends_on: []
timeout_in_minutes: 30
device: cpu-medium
no_plugin: true
source_file_dependencies:
- rust/
- rust-toolchain.toml
- .buildkite/test_areas/rust_frontend_cargo.yaml
- .buildkite/scripts/run-rust-frontend-cargo-ci.sh
commands:
- .buildkite/scripts/run-rust-frontend-cargo-ci.sh style-clippy
- label: Rust Frontend Cargo Tests
key: rust-frontend-cargo-tests
depends_on: []
timeout_in_minutes: 30
device: cpu-medium
no_plugin: true
source_file_dependencies:
- rust/
- rust-toolchain.toml
- .buildkite/test_areas/rust_frontend_cargo.yaml
- .buildkite/scripts/run-rust-frontend-cargo-ci.sh
commands:
- .buildkite/scripts/run-rust-frontend-cargo-ci.sh test
-3
View File
@@ -1,3 +0,0 @@
[submodule "rust"]
path = rust
url = https://github.com/Inferact/vllm-frontend-rs.git
+26
View File
@@ -256,6 +256,32 @@ repos:
entry: python tools/pre_commit/check_boolean_context_manager.py
language: python
types: [python]
# Rust hooks. These shell out to `cargo`; tools/pre_commit/rust-check.sh
# skips with a warning when cargo is not installed.
- id: rust-cargo-autoinherit
name: Rust - Normalize Cargo manifests with autoinherit
entry: tools/pre_commit/rust-check.sh autoinherit --prefer-simple-dotted
language: script
pass_filenames: false
require_serial: true
stages: [pre-commit] # Only run locally as Buildkite will cover this
files: ^rust/(Cargo\.toml|src/.*/Cargo\.toml)$
- id: rust-cargo-sort
name: Rust - Sort Cargo manifest sections
entry: tools/pre_commit/rust-check.sh sort --workspace
language: script
pass_filenames: false
require_serial: true
stages: [pre-commit] # Only run locally as Buildkite will cover this
files: ^rust/(Cargo\.toml|src/.*/Cargo\.toml)$
- id: rust-cargo-fmt
name: Rust - Format code
entry: tools/pre_commit/rust-check.sh fmt
language: script
pass_filenames: false
require_serial: true
stages: [pre-commit] # Only run locally as Buildkite will cover this
files: ^rust/.*(\.rs|Cargo\.toml|rustfmt\.toml)$
# Keep `suggestion` last
- id: suggestion
name: Suggestion
+3 -2
View File
@@ -9,10 +9,10 @@ set -euo pipefail
REPO_ROOT="$(cd "$(dirname "$0")" && pwd)"
RUST_DIR="$REPO_ROOT/rust"
TARGET_PATH="$REPO_ROOT/vllm/vllm-rs"
TARGET_PATH="${VLLM_RS_TARGET_PATH:-$REPO_ROOT/vllm/vllm-rs}"
# Read the required toolchain from rust-toolchain.toml.
TOOLCHAIN=$(grep '^channel' "$RUST_DIR/rust-toolchain.toml" | sed 's/.*= *"\(.*\)"/\1/')
TOOLCHAIN=$(grep '^channel' "$REPO_ROOT/rust-toolchain.toml" | sed 's/.*= *"\(.*\)"/\1/')
# Ensure rustup and the required toolchain are available.
if ! command -v rustup &>/dev/null; then
@@ -39,5 +39,6 @@ cargo +"$TOOLCHAIN" build "${PROFILE_ARGS[@]}" \
--bin vllm-rs \
--features native-tls-vendored
mkdir -p "$(dirname "$TARGET_PATH")"
cp "$RUST_DIR/target/$PROFILE_DIR/vllm-rs" "$TARGET_PATH"
echo "Installed vllm-rs to $TARGET_PATH"
+4 -12
View File
@@ -256,7 +256,7 @@ RUN if [ "${BUILD_OS}" = "manylinux" ]; then \
COPY tools/install_protoc.sh /tmp/install_protoc.sh
RUN /tmp/install_protoc.sh && rm /tmp/install_protoc.sh
# Install rustup; the toolchain itself is pinned by rust/rust-toolchain.toml.
# Install rustup; the toolchain itself is pinned by rust-toolchain.toml.
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | \
sh -s -- -y --profile minimal --default-toolchain none
ENV PATH="/root/.cargo/bin:${PATH}"
@@ -265,14 +265,8 @@ WORKDIR /workspace
# Copy only the rust workspace — the binary is the sole artifact we need.
COPY rust rust
# Fail loudly if the rust submodule was not initialized on the host before
# `docker build`. Without this check, cargo would emit a confusing error.
RUN if [ ! -f rust/Cargo.toml ]; then \
echo "ERROR: rust/ submodule is not initialized."; \
echo "Run 'git submodule update --init --recursive' on the host before building."; \
exit 1; \
fi
COPY rust-toolchain.toml rust-toolchain.toml
COPY build_rust.sh build_rust.sh
# Cap cargo parallelism to avoid exhausting the CI host's open-file limit
# (rustc spawns enough concurrent processes to hit RLIMIT_NOFILE otherwise).
@@ -284,9 +278,7 @@ ENV CARGO_BUILD_JOBS=4
RUN --mount=type=cache,target=/root/.cargo/registry \
--mount=type=cache,target=/root/.cargo/git \
--mount=type=cache,target=/workspace/rust/target \
cd rust \
&& cargo build --release --bin vllm-rs --features native-tls-vendored \
&& cp target/release/vllm-rs /workspace/vllm-rs
VLLM_RS_TARGET_PATH=/workspace/vllm-rs bash build_rust.sh
#################### RUST BUILD IMAGE ####################
#################### CSRC BUILD IMAGE ####################
+4 -12
View File
@@ -96,7 +96,7 @@ RUN apt-get update -y \
COPY tools/install_protoc.sh /tmp/install_protoc.sh
RUN /tmp/install_protoc.sh && rm /tmp/install_protoc.sh
# Install rustup; the toolchain itself is pinned by rust/rust-toolchain.toml.
# Install rustup; the toolchain itself is pinned by rust-toolchain.toml.
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | \
sh -s -- -y --profile minimal --default-toolchain none
ENV PATH="/root/.cargo/bin:${PATH}"
@@ -105,14 +105,8 @@ WORKDIR /workspace
# Copy only the rust workspace — the binary is the sole artifact we need.
COPY rust rust
# Fail loudly if the rust submodule was not initialized on the host before
# `docker build`. Without this check, cargo would emit a confusing error.
RUN if [ ! -f rust/Cargo.toml ]; then \
echo "ERROR: rust/ submodule is not initialized."; \
echo "Run 'git submodule update --init --recursive' on the host before building."; \
exit 1; \
fi
COPY rust-toolchain.toml rust-toolchain.toml
COPY build_rust.sh build_rust.sh
# Cap cargo parallelism to avoid exhausting the CI host's open-file limit
# (rustc spawns enough concurrent processes to hit RLIMIT_NOFILE otherwise).
@@ -124,9 +118,7 @@ ENV CARGO_BUILD_JOBS=4
RUN --mount=type=cache,target=/root/.cargo/registry \
--mount=type=cache,target=/root/.cargo/git \
--mount=type=cache,target=/workspace/rust/target \
cd rust \
&& cargo build --release --bin vllm-rs --features native-tls-vendored \
&& cp target/release/vllm-rs /workspace/vllm-rs
VLLM_RS_TARGET_PATH=/workspace/vllm-rs bash build_rust.sh
######################### BUILD IMAGE #########################
FROM base AS vllm-build
+4 -12
View File
@@ -108,7 +108,7 @@ RUN apt-get update -y \
COPY tools/install_protoc.sh /tmp/install_protoc.sh
RUN /tmp/install_protoc.sh && rm /tmp/install_protoc.sh
# Install rustup; the toolchain itself is pinned by rust/rust-toolchain.toml.
# Install rustup; the toolchain itself is pinned by rust-toolchain.toml.
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | \
sh -s -- -y --profile minimal --default-toolchain none
ENV PATH="/root/.cargo/bin:${PATH}"
@@ -116,14 +116,8 @@ ENV PATH="/root/.cargo/bin:${PATH}"
WORKDIR /workspace
COPY rust rust
# Fail loudly if the rust submodule was not initialized on the host before
# `docker build`.
RUN if [ ! -f rust/Cargo.toml ]; then \
echo "ERROR: rust/ submodule is not initialized."; \
echo "Run 'git submodule update --init --recursive' on the host before building."; \
exit 1; \
fi
COPY rust-toolchain.toml rust-toolchain.toml
COPY build_rust.sh build_rust.sh
# Cap cargo parallelism to avoid exhausting the CI host's open-file limit
# (rustc spawns enough concurrent processes to hit RLIMIT_NOFILE otherwise).
@@ -132,9 +126,7 @@ ENV CARGO_BUILD_JOBS=4
RUN --mount=type=cache,target=/root/.cargo/registry \
--mount=type=cache,target=/root/.cargo/git \
--mount=type=cache,target=/workspace/rust/target \
cd rust \
&& cargo build --release --bin vllm-rs --features native-tls-vendored \
&& cp target/release/vllm-rs /workspace/vllm-rs
VLLM_RS_TARGET_PATH=/workspace/vllm-rs bash build_rust.sh
#################### RUST BUILD IMAGE ####################
#################### WHEEL BUILD IMAGE ####################
+4 -15
View File
@@ -106,7 +106,6 @@ ONBUILD RUN git clone ${VLLM_REPO} \
&& cd vllm \
&& git fetch -v --prune -- origin ${VLLM_BRANCH} \
&& git checkout FETCH_HEAD \
&& git submodule update --init --recursive \
&& if [ ${VLLM_REPO} != "https://github.com/vllm-project/vllm.git" ] ; then \
git remote add upstream "https://github.com/vllm-project/vllm.git" \
&& git fetch upstream ; fi
@@ -120,16 +119,7 @@ FROM fetch_vllm_${REMOTE_VLLM} AS fetch_vllm
FROM fetch_vllm AS rust-build
ARG COMMON_WORKDIR
# Fail loudly if the rust submodule was not initialized on the host before
# `docker build`. The rust frontend source is brought in via the fetch_vllm
# stage, so an uninitialized submodule would otherwise produce a confusing
# cargo failure.
RUN if [ ! -f ${COMMON_WORKDIR}/vllm/rust/Cargo.toml ]; then \
echo "ERROR: rust/ submodule is not initialized."; \
echo "Run 'git submodule update --init --recursive' on the host before building."; \
exit 1; \
fi
# protoc is used by tonic-build/prost-build.
RUN apt-get update -q -y && apt-get install -q -y --no-install-recommends \
ca-certificates curl unzip \
&& rm -rf /var/lib/apt/lists/*
@@ -137,7 +127,7 @@ RUN apt-get update -q -y && apt-get install -q -y --no-install-recommends \
COPY tools/install_protoc.sh /tmp/install_protoc.sh
RUN /tmp/install_protoc.sh && rm /tmp/install_protoc.sh
# Install rustup; the toolchain itself is pinned by rust/rust-toolchain.toml.
# Install rustup; the toolchain itself is pinned by rust-toolchain.toml.
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | \
sh -s -- -y --profile minimal --default-toolchain none
ENV PATH="/root/.cargo/bin:${PATH}"
@@ -150,9 +140,8 @@ ENV CARGO_BUILD_JOBS=4
# so it persists into the image layer for later COPY --from=rust-build.
RUN --mount=type=cache,target=/root/.cargo/registry \
--mount=type=cache,target=/root/.cargo/git \
cd ${COMMON_WORKDIR}/vllm/rust \
&& cargo build --release --bin vllm-rs --features native-tls-vendored \
&& cp target/release/vllm-rs /tmp/vllm-rs
cd ${COMMON_WORKDIR}/vllm \
&& VLLM_RS_TARGET_PATH=/tmp/vllm-rs bash build_rust.sh
# -----------------------
# vLLM build stages
+4 -12
View File
@@ -12,7 +12,7 @@ RUN apt-get update -y \
COPY tools/install_protoc.sh /tmp/install_protoc.sh
RUN /tmp/install_protoc.sh && rm /tmp/install_protoc.sh
# Install rustup; the toolchain itself is pinned by rust/rust-toolchain.toml.
# Install rustup; the toolchain itself is pinned by rust-toolchain.toml.
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | \
sh -s -- -y --profile minimal --default-toolchain none
ENV PATH="/root/.cargo/bin:${PATH}"
@@ -20,14 +20,8 @@ ENV PATH="/root/.cargo/bin:${PATH}"
WORKDIR /workspace
COPY rust rust
# Fail loudly if the rust submodule was not initialized on the host before
# `docker build`.
RUN if [ ! -f rust/Cargo.toml ]; then \
echo "ERROR: rust/ submodule is not initialized."; \
echo "Run 'git submodule update --init --recursive' on the host before building."; \
exit 1; \
fi
COPY rust-toolchain.toml rust-toolchain.toml
COPY build_rust.sh build_rust.sh
# Cap cargo parallelism to avoid exhausting the CI host's open-file limit
# (rustc spawns enough concurrent processes to hit RLIMIT_NOFILE otherwise).
@@ -36,9 +30,7 @@ ENV CARGO_BUILD_JOBS=4
RUN --mount=type=cache,target=/root/.cargo/registry \
--mount=type=cache,target=/root/.cargo/git \
--mount=type=cache,target=/workspace/rust/target \
cd rust \
&& cargo build --release --bin vllm-rs --features native-tls-vendored \
&& cp target/release/vllm-rs /workspace/vllm-rs
VLLM_RS_TARGET_PATH=/workspace/vllm-rs bash build_rust.sh
FROM intel/deep-learning-essentials:2025.3.2-0-devel-ubuntu24.04 AS vllm-base
+4 -1
View File
@@ -127,7 +127,10 @@ extend-exclude = ["tests/models/fixtures/*", "tests/prompts/*", "tests/tokenizer
"vllm/third_party/*", "vllm/entrypoints/serve/instrumentator/static/*",
"tests/entrypoints/speech_to_text/transcription/test_transcription_validation.py",
"docs/governance/process.md", "docs/assets/contributing/vllm_bench_serve_timeline.html",
"tests/v1/engine/test_fast_incdec_prefix_err.py", ".git/*", "csrc/cpu/sgl-kernels/*"]
"tests/v1/engine/test_fast_incdec_prefix_err.py", ".git/*", "csrc/cpu/sgl-kernels/*",
"rust/src/chat/src/renderer/deepseek_v32/fixtures/*",
"rust/src/tool-parser/src/gemma4.rs", "rust/src/text/src/output/decoded.rs",
"rust/src/tokenizer/src/incremental.rs", "rust/src/reasoning-parser/src/tests.rs"]
ignore-hidden = false
[tool.typos.default]
+1
View File
@@ -5,6 +5,7 @@ ray>=2.9
cmake>=3.26.1
packaging>=24.2
setuptools-scm>=8
setuptools-rust>=1.9.0
setuptools>=77.0.3,<81.0.0
setuptools-rust>=1.9.0
wheel
Submodule rust deleted from ad6771ac09
+2
View File
@@ -0,0 +1,2 @@
src/chat/src/renderer/deepseek_v32/fixtures/** linguist-generated=true
src/chat/tests/templates/** linguist-vendored=true
+3
View File
@@ -0,0 +1,3 @@
/target
AGENTS.override.md
.vscode
+37
View File
@@ -0,0 +1,37 @@
# Alternative Frontend to vLLM Engine in Rust
This project aims to implement an alternative frontend to the vLLM Engine in Rust, providing a more efficient and robust interface for interacting with the engine. Currently it's still in the very early stage and is actively evolving.
## Coding Styles
- Always use workspace dependencies for Cargo crates.
- Prefer splitting code into multiple smaller modules and files for better organization and readability, rather than putting everything in a single file.
- When refactoring or reconstructing code, always preserve the original comments and documentation VERBATIM, if applicable.
- If not specified, default to writing concise Rust documentation and comments that match the style of the existing codebase when generating code.
- When migrating code from Python or any other language, preserve the original documentation comments whenever they still make sense in the Rust code.
- Although you might be asked to only implement or migrate minimal functionality at the beginning, you should still leave necessary `TODO` comments in the code for the future improvements of the lacked features, so that it's easier for the next iteration to build upon the existing codebase.
- When writing parsers with `winnow`:
- Prefer a declarative parser shape over imperative step-by-step parsing, as long as it's more readable and maintainable.
- Prefer tuple-based parser composition over calling `parse_next` one parser at a time.
- Prefer built-in combinators and token parsers before adding local helpers.
- Add short documentation comments like `Parse a ..` to all local parser/combinator functions.
- Reuse existing utilities from `utils` module as much as possible, and add new ones there if needed.
- Rust error handling:
- Never call `to_string()` directly on an error value.
- Use `ToReportString` or `AsReport` by `thiserror-ext` instead.
- For `Error` variants that are primarily free-form text, prefer a struct variant with a `message: String` field. `thiserror_ext::Macro` will auto-derive `foo!(...)` and `bail_foo!(...)` helper macros from that shape.
- Use `foo!(...)` when you need to construct an error value inside an expression, such as `Err(foo!(...))`, `.ok_or_else(|| foo!(...))`, or `Err::<(), _>(foo!(...))?`.
- Use `bail_foo!(...)` only in statement positions where you want to exit the current `Result`-returning function immediately. Prefer it over `return Err(foo!(...))` in those cases.
- If a variant has extra structured fields, prefer the generated macro form `foo!(field = value, "message")` rather than manually writing `Error::Foo { ... }`.
- Since the project is still in early stage, it's fine to break API and make non-backwards-compatible changes as needed.
- Currently the project is only targeting Unix-like platforms, so it's fine to use Unix-specific APIs without extra compatibility layers like `cfg(unix)`
## Testing
- Prefer snapshot testing with the `expect-test` crate over writing multiple `assert_eq!` statements on individual fields. Use `expect_test::expect![[...]].assert_debug_eq(...)` to snapshot the `Debug` output of the entire struct.
- Write `expect![[""]]` as a placeholder first, then run `UPDATE_EXPECT=1 cargo test` to auto-fill the snapshot content.
- For values containing non-deterministic data (e.g., UUIDs), set them to a fixed value like `"<placeholder>"` before snapshotting.
- In tests, avoid hand-writing full request struct literals when only a few fields matter. Prefer test fixtures such as `for_test()` with struct update syntax, so newly added fields do not force mechanical edits across many tests.
- Prefer deterministic synchronization in async and integration tests, such as channels, barriers, explicit handshakes, or observable state transitions, instead of `sleep`-based timing assumptions.
- Use `sleep` only as a last resort when there is no better observable synchronization point.
- Always run test with `cargo nextest run` instead of `cargo test`, if available, as it's much faster.
+4
View File
@@ -0,0 +1,4 @@
# CLAUDE.md
First, check @AGENTS.override.md if exists.
Then, follow instructions in @AGENTS.md.
+6611
View File
File diff suppressed because it is too large Load Diff
+129
View File
@@ -0,0 +1,129 @@
[workspace]
members = [
"src/chat",
"src/cmd",
"src/engine-core-client",
"src/llm",
"src/managed-engine",
"src/metrics",
"src/reasoning-parser",
"src/server",
"src/text",
"src/tokenizer",
"src/tool-parser",
]
resolver = "3"
[workspace.package]
version = "0.1.0"
edition = "2024"
license = "Apache-2.0"
[workspace.dependencies]
anyhow = "1.0.100"
arc-swap = "1.9.0"
async-openai = "0.33.1"
async-trait = "0.1.89"
asynk-strim-attr = "0.1.0"
axum = "0.8.8"
base64 = "0.22.1"
bytemuck = { version = "1.25.0", features = ["extern_crate_alloc"] }
byteorder = "1.5.0"
bytes = "1.11.1"
clap = { version = "4.5.38", features = ["derive", "env"] }
criterion = "0.5.1"
easy-ext = "1.0.3"
educe = "0.6.0"
enum-as-inner = "0.7.0"
expect-test = "1.5.1"
fastokens = "0.2.0"
futures = "0.3.31"
half = { version = "2.7.1", features = ["bytemuck"] }
hex = "0.4.3"
hf-hub = { version = "0.5.0", features = ["tokio"] }
http-body = "1.0.1"
itertools = "0.14.0"
libc = "0.2.177"
llm-multimodal = { git = "https://github.com/vllm-project/llm-multimodal", rev = "5b558989844d1c7af3e43d0f604069ffd9c06320" }
minijinja = { version = "2.0", features = ["unstable_machinery", "json", "builtins", "loader", "loop_controls"] }
minijinja-contrib = { version = "2.0", features = ["pycompat"] }
native-tls-vendored = { package = "native-tls", version = "0.2.18", features = ["vendored"] }
ndarray = { version = "0.16.1", features = ["serde"] }
openai-harmony = "0.0.8"
openai-protocol = "1.6.0"
parking_lot = "0.12.5"
prometheus-client = "0.24.0"
prometheus-client-derive-encode = "0.5.0"
prost = "0.14.3"
prost-types = "0.14.3"
reasoning-parser = "1.2.2"
reqwest = { version = "0.12.8", default-features = false, features = ["rustls-tls"] }
riptoken = { version = "0.3.0", default-features = false }
rmp-serde = "1.3.1"
rmpv = { version = "1.3.1", features = ["with-serde"] }
rustc-hash = "1.1.0"
serde = { version = "1.0.228", features = ["derive"] }
serde-json-fmt = "0.1.0"
serde_default = "0.2.0"
serde_json = "1.0.145"
serde_repr = "0.1.20"
serde_tuple = "1.1.3"
serde_with = "3.18.0"
serial_test = "3.2.0"
socket2 = "0.6.3"
subenum = "1.1.3"
task-local = "0.1.1"
tekken = { package = "tekken-rs", version = "0.1.1", default-features = false }
tempfile = "3.23.0"
thiserror = "2.0.16"
thiserror-ext = "0.3.0"
tiktoken-rs = "0.9.1"
time = { version = "0.3.47", features = ["formatting", "local-offset", "macros"] }
tokenizers = "0.22.0"
tokio = { version = "1.47.1", features = [
"macros",
"net",
"rt-multi-thread",
"sync",
"time",
] }
tokio-stream = "0.1"
tokio-util = { version = "0.7.18", features = ["rt"] }
tonic = "0.14.5"
tonic-build = "0.14.5"
tonic-prost = "0.14.5"
tonic-prost-build = "0.14.5"
tool-parser = "1.2.0"
tower = { version = "0.5.3", features = ["util"] }
tower-http = { version = "0.6.8", features = ["trace"] }
tracing = { version = "0.1.44", features = ["release_max_level_debug"] }
tracing-futures = { version = "0.2.5", features = ["futures-03"] }
tracing-subscriber = { version = "0.3.20", features = ["env-filter", "fmt"] }
trait-set = "0.3.0"
uuid = { version = "1.22.0", features = ["v4"] }
validator = { version = "0.20.0", features = ["derive"] }
vllm-chat = { path = "src/chat" }
vllm-engine-core-client = { path = "src/engine-core-client" }
vllm-llm = { path = "src/llm" }
vllm-managed-engine = { path = "src/managed-engine" }
vllm-metrics = { path = "src/metrics" }
vllm-reasoning-parser = { path = "src/reasoning-parser" }
vllm-server = { path = "src/server" }
vllm-text = { path = "src/text" }
vllm-tokenizer = { path = "src/tokenizer" }
vllm-tool-parser = { path = "src/tool-parser" }
winnow = "1.0.2"
zeromq = { version = "0.6.0", default-features = false, features = [
"tokio-runtime",
"all-transport",
] }
[workspace.lints.clippy]
too_many_arguments = "allow"
[profile.dev]
panic = "abort"
[profile.release]
lto = "thin"
panic = "abort"
+89
View File
@@ -0,0 +1,89 @@
# vllm-frontend-rs
This is a Rust drop-in alternative frontend for vLLM. The current goal is to rebuild the northbound serving layer in Rust while still talking to the core Python vLLM engine process(es) via ZMQ over the existing engine boundary.
It should still be considered experimental, and is not feature-complete. We are working to add more functionality from the python front-end.
See <https://github.com/Inferact/vllm-frontend-rs> for the original commit history before it was moved into the main vllm repo.
## Architecture
The component is organized as a Cargo workspace with several crates, layered bottom-up:
```text
┌─────────────────────────────────┐
│ vllm-cmd / vllm-rs │ CLI entrypoint:
│ │ Python vLLM frontend subprocess
│ │ Rust managed-engine serve mode
├─────────────────────────────────┤
│ vllm-server │ OpenAI-compatible HTTP API (axum)
├─────────────────────────────────┤
│ vllm-chat │ Chat completions: template rendering,
│ │ structured assistant events,
│ │ reasoning & tool parsing
├─────────────────────────────────┤
│ vllm-text │ Tokenizer & incremental detokenizer
├─────────────────────────────────┤
│ vllm-llm │ Thin token-in/token-out facade over
│ │ the engine client
├─────────────────────────────────┤
│ vllm-engine-core-client │ ZMQ transport + MessagePack protocol
│ │ for the headless vLLM engine
└─────────────────────────────────┘
```
`vllm-rs` integrates into Python `vllm` as a Rust frontend subprocess.
Python owns process startup and launches the Rust API server as a Python-supervised worker, while
passing the inherited listening socket and transport addresses into `vllm-rs`.
For example:
```bash
VLLM_USE_RUST_FRONTEND=1 vllm serve Qwen/Qwen3-0.6B
```
### External Engine
`vllm-rs serve` can be run standalone with `--data-parallel-size-local 0` when the Python engines
are started elsewhere and this node should run only the Rust frontend. The frontend still uses
the global `--data-parallel-size` to determine how many engines it expects to join the shared handshake.
```bash
vllm serve Qwen/Qwen3-0.6B \
--headless \
--data-parallel-address 127.0.0.1 \
--data-parallel-rpc-port 62100 \
--data-parallel-size 1 \
--data-parallel-size-local 1
```
Then start the Rust frontend-only server:
```bash
vllm-rs serve Qwen/Qwen3-0.6B \
--data-parallel-address 127.0.0.1 \
--data-parallel-rpc-port 62100 \
--data-parallel-size 1 \
--data-parallel-size-local 0
```
To build the `vllm-rs` in isolation:
```bash
# from the local checkout
cargo install --path src/cmd --bin vllm-rs
```
### Example Request
After either startup path, you can use any OpenAI-compatible client:
```bash
curl http://127.0.0.1:8000/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "Qwen/Qwen3-0.6B",
"messages": [{"role": "user", "content": "What is the capital of France?"}],
"stream": true
}'
```
+196
View File
@@ -0,0 +1,196 @@
syntax = "proto3";
package vllm;
import "google/protobuf/struct.proto";
service Generate {
// Generates text given a prompt
rpc Generate (GenerateRequest) returns (GenerateResponse) {}
// Generates text given a prompt, streaming the outputs
rpc GenerateStream (GenerateRequest) returns (stream GenerateResponse) {}
}
// ======================================================================================
// Generate Request
// ======================================================================================
message GenerateRequest {
string request_id = 1;
string model = 2;
oneof prompt {
string text = 3;
TokenIds token_ids = 4;
}
// Temperature, defaults to model-specific default or 0
optional float temperature = 5;
// Parameters controlling random sampling, not applicable if temperature == 0
RandomSampling sampling = 6;
// Parameters for conditionally penalizing/boosting
// candidate tokens during decoding
DecodingParameters decoding = 7;
// Parameters controlling when generation should stop
StoppingCriteria stopping = 8;
// Flags to control what is returned in the response
ResponseOptions response = 9;
// Parameters controlling KV cache/distribution
KVCacheParameters kv = 10;
// Truncate prompt tokens; default (0) means no truncation
uint32 truncate_prompt_tokens = 11;
int32 priority = 12;
}
message RandomSampling {
uint32 num_sequences = 1; // "n", default (0) means 1
uint32 top_k = 2; // 0 means default
float top_p = 3; // 0 means default
float min_p = 4; // 0 means default
optional int64 seed = 5;
}
message DecodingParameters {
// Penalties
float presence_penalty = 1; // Default (0.0) means no penalty
float frequency_penalty = 2; // Default (0.0) means no penalty
float repetition_penalty = 3; // Default (0.0) means no penalty
map<uint32, float> logit_bias = 4;
repeated uint32 allowed_token_ids = 5;
message StringChoices {
repeated string choices = 1;
}
// Control structured outputs
oneof structured_output {
string json = 6;
string regex = 7;
StringChoices choice = 8;
string grammar = 9;
bool json_object = 10;
string structural_tag = 11;
}
}
message StoppingCriteria {
// Default (0) is currently 20
uint32 max_new_tokens = 1;
// Default (0) means no minimum
uint32 min_new_tokens = 2;
repeated uint32 stop_token_ids = 3;
repeated string stop_strings = 4;
bool include_stop_strings = 5;
bool ignore_eos = 6;
}
message ResponseOptions {
// Prompt options
bool prompt_token_ids = 1;
bool prompt_logprobs = 2;
optional CandidateTokens prompt_candidates = 3;
// Output options; output_text defaults to true
optional bool output_text = 4;
bool output_token_ids = 5;
bool output_logprobs = 6;
optional CandidateTokens output_candidates = 7;
}
message KVCacheParameters {
bool bypass_prefix_cache = 1;
string cache_salt = 2;
// KV Connector transfer parameters
google.protobuf.Struct kv_transfer_params = 3;
}
// Controls which extra candidate tokens at each position should be returned
message CandidateTokens {
oneof select {
uint32 top_n = 1;
TokenIds token_ids = 2;
bool all = 3;
}
}
// ======================================================================================
// Generate Response
// ======================================================================================
message GenerateResponse {
// Only present in first response
optional PromptInfo prompt_info = 1;
SequenceOutput outputs = 2;
}
message SequenceOutput {
// Index of output sequence for num_sequences > 1.
uint32 index = 1;
string text = 2;
uint32 num_tokens = 3; // Number of tokens in this chunk
repeated uint32 token_ids = 4; // If requested
repeated float logprobs = 5; // If requested
repeated uint32 ranks = 6; // If logprobs were requested
repeated CandidateTokenInfo candidate_tokens = 7; // If requested
// Only present in final output for this sequence
optional FinishInfo finish_info = 8;
}
// Prompt info, returned in the first response
message PromptInfo {
uint32 num_prompt_tokens = 1;
repeated uint32 token_ids = 2; // If requested
repeated float logprobs = 3; // If requested
repeated uint32 ranks = 4; // If logprobs were requested
repeated CandidateTokenInfo candidate_tokens = 5;
}
// Finish info, returned in the final response
message FinishInfo {
uint32 num_output_tokens = 1;
enum FinishReason {
NOT_FINISHED = 0; // Possibly more tokens to be streamed
LENGTH = 1; // Finished due to length constraint
STOP = 2; // Stop string/token or EOS encountered
ABORTED = 3; // Request aborted/cancelled
}
FinishReason finish_reason = 2;
// One of these will be set when finish_reason == STOP
oneof stop_reason {
uint32 stop_token_id = 3;
uint32 eos_token_id = 4;
string stop_string = 5;
}
google.protobuf.Struct kv_transfer_params = 6;
//uint64 seed = 7;
}
// Info for candidate tokens other than the input/sampled
// token at a given position
message CandidateTokenInfo {
message TokenInfo {
uint32 id = 1;
float logprob = 2;
uint32 rank = 3;
// string text = 4;
// bytes token_bytes = 5;
}
// Candidate token infos at this position
repeated TokenInfo tokens = 1;
}
// Token ids used for prompt
message TokenIds {
repeated uint32 ids = 1;
}
+3
View File
@@ -0,0 +1,3 @@
style_edition = "2024"
chain_width = 80
use_field_init_shorthand = true
+20
View File
@@ -0,0 +1,20 @@
# Optional local formatting profile. CI and pre-commit use rustfmt.toml.
# Apply manually with:
# cargo +nightly fmt -- --config-path rustfmt.unstable.toml
style_edition = "2024"
chain_width = 80
comment_width = 100
use_field_init_shorthand = true
# Unstable features go here.
unstable_features = true
format_code_in_doc_comments = true
format_macro_matchers = true
normalize_comments = true
normalize_doc_attributes = true
imports_granularity = "Module"
group_imports = "StdExternalCrate"
reorder_impl_items = true
wrap_comments = true
+52
View File
@@ -0,0 +1,52 @@
[package]
name = "vllm-chat"
version.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
anyhow.workspace = true
asynk-strim-attr.workspace = true
easy-ext.workspace = true
futures.workspace = true
half.workspace = true
itertools.workspace = true
llm-multimodal.workspace = true
minijinja.workspace = true
minijinja-contrib.workspace = true
openai-harmony.workspace = true
reqwest.workspace = true
serde.workspace = true
serde-json-fmt.workspace = true
serde_json.workspace = true
serde_with.workspace = true
subenum.workspace = true
thiserror.workspace = true
thiserror-ext.workspace = true
tokio.workspace = true
tracing.workspace = true
trait-set.workspace = true
uuid.workspace = true
vllm-engine-core-client.workspace = true
vllm-llm.workspace = true
vllm-reasoning-parser.workspace = true
vllm-text.workspace = true
vllm-tokenizer.workspace = true
vllm-tool-parser.workspace = true
[dev-dependencies]
anyhow.workspace = true
bytes.workspace = true
clap.workspace = true
expect-test.workspace = true
rmp-serde.workspace = true
serial_test = { workspace = true, features = ["file_locks"] }
tempfile.workspace = true
tokio.workspace = true
tracing-subscriber.workspace = true
uuid.workspace = true
vllm-engine-core-client = { workspace = true, features = ["test-util"] }
zeromq.workspace = true
[lints]
workspace = true
+39
View File
@@ -0,0 +1,39 @@
# Chat Smoke Test
Start a fresh headless `vllm` engine:
```bash
source ../vllm/.venv/bin/activate
HF_HUB_OFFLINE=1 \
VLLM_LOGGING_LEVEL=DEBUG \
VLLM_CPU_KVCACHE_SPACE=2 \
VLLM_HOST_IP=127.0.0.1 \
VLLM_LOOPBACK_IP=127.0.0.1 \
python3 -m vllm.entrypoints.cli.main serve Qwen/Qwen3-0.6B \
--headless \
--data-parallel-address 127.0.0.1 \
--data-parallel-rpc-port 62100 \
--data-parallel-size-local 1 \
--max-model-len 512 \
--dtype float16
```
Run the Rust chat smoke test through the `vllm-chat` interface:
```bash
cargo run -p vllm-chat --example external_engine_chat_qwen -- \
--handshake-address tcp://127.0.0.1:62100 \
--host 127.0.0.1 \
--prompt 'What is the capital of France? Answer with one word.'
```
The example now defaults to `Qwen/Qwen3-0.6B`. The current `vllm-chat`
request model stays text-first and supports either plain string content or
OpenAI-style text blocks, while the output side now emits structured assistant
events and automatically separates reasoning blocks for supported models. Tool
use and multimodal inputs are still out of scope. It uses the Rust
`tokenizers` library for the tokenizer itself, plus standard Hugging Face
config files to load the chat template and EOS metadata.
IMPORTANT: Restart `vllm` each time you run the smoke test. The current headless
engine cannot safely handle frontend reconnects after the client shuts down.
@@ -0,0 +1,178 @@
use std::time::Duration;
use anyhow::{Context, Result, bail};
use clap::Parser;
use futures::StreamExt as _;
use tracing_subscriber::EnvFilter;
use vllm_chat::{
AssistantBlockKind, AssistantMessageExt as _, ChatEvent, ChatLlm, ChatMessage, ChatRequest,
ChatRole, SamplingParams, load_model_backends,
};
use vllm_engine_core_client::{EngineCoreClient, EngineCoreClientConfig, TransportMode};
use vllm_llm::Llm;
use vllm_text::TextLlm;
#[derive(Debug, Parser)]
#[command(about = "Smoke-test the Rust chat facade against an external Qwen vLLM engine.")]
struct Args {
#[arg(long)]
handshake_address: String,
#[arg(long, default_value_t = 1)]
engine_count: usize,
#[arg(long, default_value = "Qwen/Qwen3-0.6B")]
model: String,
#[arg(long, default_value = "127.0.0.1")]
host: String,
#[arg(long, default_value_t = 30)]
ready_timeout_secs: u64,
#[arg(long)]
prompt: String,
}
const CLIENT_INDEX: u32 = 0;
const OUTPUT_TIMEOUT_SECS: u64 = 120;
fn unique_request_id() -> String {
format!("rust-chat-smoke-{}", uuid::Uuid::new_v4())
}
fn init_tracing() {
let filter = EnvFilter::try_from_default_env()
.unwrap_or_else(|_| EnvFilter::new("vllm_engine_core_client=debug"));
let _ = tracing_subscriber::fmt().with_env_filter(filter).try_init();
}
#[tokio::main(flavor = "multi_thread")]
async fn main() -> Result<()> {
init_tracing();
let args = Args::parse();
let loaded = load_model_backends(&args.model, Default::default())
.await
.with_context(|| format!("failed to load backends for {}", args.model))?;
let text_backend = loaded.text_backend;
let chat_backend = loaded.chat_backend;
let ready_timeout = Duration::from_secs(args.ready_timeout_secs);
let output_timeout = Duration::from_secs(OUTPUT_TIMEOUT_SECS);
let request_id = unique_request_id();
let client = EngineCoreClient::connect(EngineCoreClientConfig {
transport_mode: TransportMode::HandshakeOwner {
handshake_address: args.handshake_address.clone(),
advertised_host: args.host.clone(),
engine_count: args.engine_count,
ready_timeout,
local_input_address: None,
local_output_address: None,
},
coordinator_mode: None,
model_name: args.model.clone(),
client_index: CLIENT_INDEX,
})
.await
.context("failed to connect to external vLLM engine")?;
println!("model={}", args.model);
println!("tokenizer_source=tokenizers + hf-hub");
println!("chat_template_source=tokenizer_config.json or adjacent chat template file");
println!("handshake_address={}", args.handshake_address);
println!("engine_count={}", args.engine_count);
println!("input_address={}", client.input_address());
println!("output_address={}", client.output_address());
println!("engine_identities={:x?}", client.engine_identities());
let llm = Llm::new(client);
let chat = ChatLlm::new(TextLlm::new(llm, text_backend), chat_backend);
let request = ChatRequest {
messages: vec![ChatMessage::text(ChatRole::User, args.prompt.clone())],
sampling_params: SamplingParams {
temperature: Some(0.0),
..Default::default()
},
request_id: request_id.clone(),
..ChatRequest::for_test()
};
println!("request_id={request_id}");
println!("prompt={}", args.prompt);
let mut stream = chat.chat(request).await.context("failed to submit chat request")?;
let output = tokio::time::timeout(output_timeout, async {
let mut final_reasoning = String::new();
let mut final_text = String::new();
let mut final_output_token_count = 0usize;
let mut finish_reason = None;
let mut saw_start = false;
let mut saw_stream_output = false;
while let Some(event) = stream.next().await.transpose()? {
match event {
ChatEvent::Start { .. } => {
saw_start = true;
}
ChatEvent::BlockStart { kind, .. } => {
if saw_stream_output {
println!();
}
match kind {
AssistantBlockKind::Reasoning => print!("[reasoning] "),
AssistantBlockKind::Text => print!("[answer] "),
AssistantBlockKind::ToolCall => {}
}
saw_stream_output = true;
}
ChatEvent::ToolCallStart { name, .. } => {
if saw_stream_output {
println!();
}
print!("[tool:{name}] ");
saw_stream_output = true;
}
ChatEvent::LogprobsDelta { .. } => {}
ChatEvent::Done {
message,
output_token_count,
finish_reason: reason,
..
} => {
final_reasoning = message.reasoning().unwrap_or_default();
final_text = message.text();
final_output_token_count = output_token_count;
finish_reason = Some(reason);
break;
}
ChatEvent::BlockDelta { kind, delta, .. } => match kind {
AssistantBlockKind::Reasoning | AssistantBlockKind::Text => {
print!("{delta}");
}
AssistantBlockKind::ToolCall => {}
},
ChatEvent::ToolCallArgumentsDelta { delta, .. } => print!("{delta}"),
ChatEvent::BlockEnd { .. } | ChatEvent::ToolCallEnd { .. } => {}
}
}
println!();
if !saw_start {
bail!("chat stream ended without a start event");
}
Ok::<_, anyhow::Error>((
final_reasoning,
final_text,
final_output_token_count,
finish_reason,
))
})
.await
.context("timed out waiting for chat output")??;
chat.shutdown().await.context("failed to shut down chat client")?;
println!("final_reasoning={:?}", output.0);
println!("final_text={:?}", output.1);
println!("final_output_token_count={:?}", output.2);
println!("finish_reason={:?}", output.3);
Ok(())
}
+308
View File
@@ -0,0 +1,308 @@
use std::sync::Arc;
use tracing::info;
use vllm_text::backend::hf::{HfTextBackend, ResolvedModelFiles, load_model_config};
use vllm_text::tokenizer::DynTokenizer;
use vllm_text::{DynTextBackend, TextBackend as _};
use crate::backend::{
ChatBackend, DynChatBackend, LoadModelBackendsOptions, LoadedModelBackends,
NewChatOutputProcessorOptions,
};
use crate::error::Result;
use crate::multimodal::MultimodalModelInfo;
use crate::output::{
DefaultChatOutputProcessor, HarmonyChatOutputProcessor, validate_harmony_parser_overrides,
};
use crate::renderer::hf::{HfChatRenderer, MultimodalRenderInfo};
use crate::renderer::{DeepSeekV4ChatRenderer, DeepSeekV32ChatRenderer, DynChatRenderer};
use crate::request::ChatRequest;
use crate::{DynChatOutputProcessor, RendererSelection};
/// [`ChatBackend`] implementation built on Hugging Face model files.
pub struct HfChatBackend {
model_id: String,
model_type: String,
tokenizer: DynTokenizer,
chat_renderer: DynChatRenderer,
multimodal_model_info: Option<MultimodalModelInfo>,
}
impl HfChatBackend {
/// Load the chat backend from resolved Hugging Face model files.
pub fn from_resolved_model_files(
files: ResolvedModelFiles,
model_id: String,
options: LoadModelBackendsOptions,
tokenizer: DynTokenizer,
) -> Result<Self> {
let model_config = load_model_config(files.config_path.as_deref())?;
let model_type = model_config.model_type().unwrap_or_default();
let multimodal_model_info = MultimodalModelInfo::from_paths(
model_id.clone(),
(!model_type.is_empty()).then_some(model_type.to_string()),
files.config_path.as_deref(),
files.preprocessor_config_path.as_deref(),
tokenizer.clone(),
)?;
let multimodal_render_info = resolve_multimodal_render_info(multimodal_model_info.as_ref());
let renderer = options.renderer.resolve(model_type);
let chat_renderer: DynChatRenderer = match renderer {
RendererSelection::Auto => unreachable!("renderer auto should be resolved above"),
RendererSelection::Hf => Arc::new(HfChatRenderer::load(
&files,
options,
multimodal_render_info,
)?),
RendererSelection::DeepSeekV32 => Arc::new(DeepSeekV32ChatRenderer::new()),
RendererSelection::DeepSeekV4 => Arc::new(DeepSeekV4ChatRenderer::new()),
};
info!(
model_id,
model_type,
%renderer,
"loaded chat backend with Hugging Face model files"
);
Ok(Self {
model_id,
model_type: model_type.to_string(),
tokenizer,
chat_renderer,
multimodal_model_info,
})
}
}
impl ChatBackend for HfChatBackend {
fn chat_renderer(&self) -> DynChatRenderer {
self.chat_renderer.clone()
}
fn multimodal_model_info(&self) -> Option<&MultimodalModelInfo> {
self.multimodal_model_info.as_ref()
}
fn new_chat_output_processor(
&self,
request: &mut ChatRequest,
options: NewChatOutputProcessorOptions<'_>,
) -> Result<DynChatOutputProcessor> {
if self.model_type == "gpt_oss" {
validate_harmony_parser_overrides(options.tool_call_parser, options.reasoning_parser)?;
return Ok(Box::new(HarmonyChatOutputProcessor::new(request)?));
}
Ok(Box::new(DefaultChatOutputProcessor::new(
request,
&self.model_id,
self.tokenizer.clone(),
options.tool_call_parser,
options.reasoning_parser,
)?))
}
}
/// Load the Hugging Face text and chat backends for the given model id.
pub(super) async fn load_model_backends(
model_id: &str,
options: LoadModelBackendsOptions,
) -> Result<LoadedModelBackends> {
let files = ResolvedModelFiles::new(model_id).await?;
let text_backend =
HfTextBackend::from_resolved_model_files(files.clone(), model_id.to_string())?;
let tokenizer = text_backend.tokenizer();
let text_backend: DynTextBackend = Arc::new(text_backend);
let chat_backend: DynChatBackend = Arc::new(HfChatBackend::from_resolved_model_files(
files,
model_id.to_string(),
options,
tokenizer,
)?);
Ok(LoadedModelBackends {
text_backend,
chat_backend,
})
}
fn resolve_multimodal_render_info(
info: Option<&MultimodalModelInfo>,
) -> Option<MultimodalRenderInfo> {
info.map(|info| MultimodalRenderInfo {
placeholder_token: info.placeholder_token().to_string(),
})
}
#[cfg(test)]
mod tests {
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::Arc;
use tempfile::tempdir;
use vllm_text::backend::hf::TokenizerSource;
use vllm_text::tokenizer::{DynTokenizer, Tokenizer};
use super::HfChatBackend;
use crate::RendererSelection;
use crate::backend::{ChatBackend, LoadModelBackendsOptions};
use crate::request::{ChatContent, ChatMessage, ChatRequest};
fn request_with_user_text(text: &str) -> ChatRequest {
ChatRequest {
request_id: "renderer-selection-test".to_string(),
messages: vec![ChatMessage::User {
content: ChatContent::Text(text.to_string()),
}],
..ChatRequest::for_test()
}
}
fn write_json(path: &std::path::Path, content: &str) {
std::fs::write(path, content).unwrap();
}
fn resolved_files(
config_json: &str,
tokenizer_config_json: &str,
) -> vllm_text::backend::hf::ResolvedModelFiles {
let dir = tempdir().unwrap();
let root = dir.keep();
let config_path = root.join("config.json");
let tokenizer_config_path = root.join("tokenizer_config.json");
write_json(&config_path, config_json);
write_json(&tokenizer_config_path, tokenizer_config_json);
vllm_text::backend::hf::ResolvedModelFiles {
tokenizer: TokenizerSource::HuggingFace(PathBuf::from("/tmp/unused-tokenizer.json")),
tokenizer_config_path: Some(tokenizer_config_path),
generation_config_path: None,
preprocessor_config_path: None,
chat_template_path: None,
config_path: Some(config_path),
}
}
struct TestTokenizer;
impl Tokenizer for TestTokenizer {
fn encode(
&self,
_text: &str,
_add_special_tokens: bool,
) -> vllm_text::tokenizer::Result<Vec<u32>> {
Ok(Vec::new())
}
fn decode(
&self,
_token_ids: &[u32],
_skip_special_tokens: bool,
) -> vllm_text::tokenizer::Result<String> {
Ok(String::new())
}
fn token_to_id(&self, _token: &str) -> Option<u32> {
None
}
}
fn test_tokenizer() -> DynTokenizer {
Arc::new(TestTokenizer)
}
fn render_prompt(
renderer: RendererSelection,
config_json: &str,
tokenizer_config_json: &str,
) -> String {
let backend = HfChatBackend::from_resolved_model_files(
resolved_files(config_json, tokenizer_config_json),
"test-model".to_string(),
LoadModelBackendsOptions {
renderer,
chat_template_content_format: Default::default(),
chat_template: None,
default_chat_template_kwargs: HashMap::new(),
},
test_tokenizer(),
)
.unwrap();
backend
.chat_renderer()
.render(&request_with_user_text("hello"))
.unwrap()
.prompt
.into_text()
.expect("renderer should return text prompt")
}
#[test]
fn auto_uses_deepseek_renderer_for_deepseek_v32_model_type() {
let prompt = render_prompt(
RendererSelection::Auto,
r#"{"model_type":"deepseek_v32"}"#,
r#"{}"#,
);
assert_eq!(
prompt,
"<begin▁of▁sentence><User>hello<Assistant></think>"
);
}
#[test]
fn auto_uses_hf_renderer_for_other_model_types() {
let prompt = render_prompt(
RendererSelection::Auto,
r#"{"model_type":"qwen2"}"#,
r#"{"chat_template":"{{ messages[0].content }}"}"#,
);
assert_eq!(prompt, "hello");
}
#[test]
fn explicit_deepseek_renderer_overrides_generic_model_type() {
let prompt = render_prompt(
RendererSelection::DeepSeekV32,
r#"{"model_type":"qwen2"}"#,
r#"{"chat_template":"{{ messages[0].content }}"}"#,
);
assert_eq!(
prompt,
"<begin▁of▁sentence><User>hello<Assistant></think>"
);
}
#[test]
fn explicit_hf_renderer_overrides_deepseek_v32_model_type() {
let prompt = render_prompt(
RendererSelection::Hf,
r#"{"model_type":"deepseek_v32"}"#,
r#"{"chat_template":"{{ messages[0].content }}"}"#,
);
assert_eq!(prompt, "hello");
}
#[test]
fn auto_uses_nested_text_config_model_type() {
let prompt = render_prompt(
RendererSelection::Auto,
r#"{"text_config":{"model_type":"deepseek_v32","num_attention_heads":32}}"#,
r#"{}"#,
);
assert_eq!(
prompt,
"<begin▁of▁sentence><User>hello<Assistant></think>"
);
}
}
+86
View File
@@ -0,0 +1,86 @@
use std::collections::HashMap;
use std::sync::Arc;
use serde_json::Value;
use vllm_text::{DynTextBackend, TextBackend};
use crate::error::Result;
use crate::multimodal::MultimodalModelInfo;
use crate::output::DynChatOutputProcessor;
use crate::renderer::DynChatRenderer;
use crate::request::ChatRequest;
use crate::{ChatTemplateContentFormatOption, ParserSelection, RendererSelection};
pub mod hf;
/// Options for creating a new chat output processor.
pub struct NewChatOutputProcessorOptions<'a> {
pub tool_call_parser: &'a ParserSelection,
pub reasoning_parser: &'a ParserSelection,
}
/// Minimal prompt-processing backend needed by `vllm-chat`.
pub trait ChatBackend: Send + Sync {
/// Return the renderer used for chat-prompt construction.
fn chat_renderer(&self) -> DynChatRenderer;
/// Return model files/config needed for request-scoped multimodal
/// preprocessing, if supported.
fn multimodal_model_info(&self) -> Option<&MultimodalModelInfo> {
None
}
/// Create a request-scoped output processor after request-level adjustments
/// are applied.
fn new_chat_output_processor(
&self,
request: &mut ChatRequest,
options: NewChatOutputProcessorOptions<'_>,
) -> Result<DynChatOutputProcessor>;
}
/// Shared trait-object form of [`ChatBackend`].
pub type DynChatBackend = Arc<dyn ChatBackend>;
/// Convenience trait for backends that can serve both raw text generation and
/// chat templating.
///
/// This is mainly useful in tests and small examples, where one mock/backend
/// often implements both sides and callers want `ChatLlm` to wire the shared
/// object into `TextLlm` automatically.
pub trait ChatTextBackend: ChatBackend + TextBackend {}
impl<T> ChatTextBackend for T where T: ChatBackend + TextBackend + ?Sized {}
/// Shared trait-object form of [`ChatTextBackend`].
pub type DynChatTextBackend = Arc<dyn ChatTextBackend>;
/// Frontend-side chat backend loading options.
#[derive(Debug, Clone, PartialEq, Eq, Default)]
pub struct LoadModelBackendsOptions {
/// Which chat renderer implementation to use.
pub renderer: RendererSelection,
/// How to serialize `message.content` when rendering the chat template.
pub chat_template_content_format: ChatTemplateContentFormatOption,
/// Optional server-default chat template override, provided either as an
/// inline template or as a path to a template file.
pub chat_template: Option<String>,
/// Optional server-default keyword arguments merged into every
/// chat-template render before request-level `chat_template_kwargs`.
pub default_chat_template_kwargs: HashMap<String, Value>,
}
/// Shared backends loaded from a model id.
pub struct LoadedModelBackends {
pub text_backend: DynTextBackend,
pub chat_backend: DynChatBackend,
}
/// Load text and chat backends for the given model id.
pub async fn load_model_backends(
model_id: &str,
options: LoadModelBackendsOptions,
) -> Result<LoadedModelBackends> {
// Currently, we only have HuggingFace backends.
hf::load_model_backends(model_id, options).await
}
+80
View File
@@ -0,0 +1,80 @@
use thiserror::Error;
use thiserror_ext::Macro;
type BoxedError = Box<dyn std::error::Error + Send + Sync>;
#[derive(Debug, Error, Macro)]
#[thiserror_ext(macro(path = "crate::error"))]
pub enum Error {
#[error("chat request must contain at least one message")]
EmptyMessages,
#[error("cannot continue the final message when the last message is not from the assistant")]
ContinueFinalAssistantWithoutFinalAssistant,
#[error("chat template is required but none was configured")]
MissingChatTemplate,
#[error("chat template error: {0}")]
ChatTemplate(String),
#[error("multimodal input is not supported by this chat renderer")]
UnsupportedMultimodalRenderer,
#[error("unsupported multimodal content: {0}")]
UnsupportedMultimodalContent(&'static str),
#[error("multimodal preprocessing error: {0}")]
Multimodal(#[message] String),
#[error("{kind} parsing is not available for model `{model_id}`")]
ParserUnavailableForModel {
kind: &'static str,
model_id: String,
},
#[error("{kind} parsing is disabled by frontend configuration")]
ParserDisabled { kind: &'static str },
#[error(
"{kind} parser `{name}` is not registered{}",
available_parser_hint(.available_names)
)]
ParserUnavailableByName {
kind: &'static str,
name: String,
available_names: Vec<String>,
},
#[error("failed to initialize {kind} parser `{name}`")]
ParserInitialization {
kind: &'static str,
name: String,
#[source]
error: BoxedError,
},
#[error(
"gpt_oss uses native Harmony output parsing; generic {kind} parser override `{selection}` is not supported"
)]
HarmonyParserOverrideUnsupported {
kind: &'static str,
selection: String,
},
#[error("harmony output parsing failed")]
HarmonyOutputParsing {
#[source]
error: BoxedError,
},
#[error(
"this model's maximum context length is {max_model_len} tokens, \
but the prompt contains {prompt_len} input tokens"
)]
PromptTooLong { max_model_len: u32, prompt_len: u32 },
#[error("chat request stream `{request_id}` closed before terminal output")]
StreamClosedBeforeTerminalOutput { request_id: String },
#[error("tool call stream state is inconsistent: {message}")]
ToolCallStreamInvariant { message: String },
#[error(transparent)]
Text(#[from] vllm_text::Error),
}
pub type Result<T> = std::result::Result<T, Error>;
/// Format the available-parser suffix used in user-facing error messages.
fn available_parser_hint(available_names: &[String]) -> String {
if available_names.is_empty() {
String::new()
} else {
format!(" (choose from: {})", available_names.join(", "))
}
}
+183
View File
@@ -0,0 +1,183 @@
use std::ops::Deref;
use std::sync::Arc;
use serde::{Deserialize, Serialize};
use vllm_text::{DecodedLogprobs, DecodedPromptLogprobs};
use crate::FinishReason;
/// One finalized assistant tool call.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct AssistantToolCall {
pub id: String,
pub name: String,
pub arguments: String,
}
/// Semantic kind of one assistant output block.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum AssistantBlockKind {
/// Visible final-answer text.
Text,
/// Extracted reasoning content.
Reasoning,
/// One finalized tool call.
ToolCall,
}
/// One structured assistant output block.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum AssistantContentBlock {
/// Visible final-answer text.
Text { text: String },
/// Extracted reasoning content.
Reasoning { text: String },
/// One finalized tool call.
ToolCall(AssistantToolCall),
}
impl AssistantContentBlock {
/// Return the semantic kind of this block.
pub fn kind(&self) -> AssistantBlockKind {
match self {
Self::Text { .. } => AssistantBlockKind::Text,
Self::Reasoning { .. } => AssistantBlockKind::Reasoning,
Self::ToolCall(..) => AssistantBlockKind::ToolCall,
}
}
/// Return this block as one finalized tool call, if applicable.
pub fn as_tool_call(&self) -> Option<&AssistantToolCall> {
match self {
Self::ToolCall(call) => Some(call),
_ => None,
}
}
}
#[easy_ext::ext(AssistantMessageExt)]
impl [AssistantContentBlock] {
/// Concatenate all visible final-answer text blocks.
pub fn text(&self) -> String {
self.iter()
.filter_map(|block| match block {
AssistantContentBlock::Text { text } => Some(text.as_str()),
_ => None,
})
.collect()
}
/// Concatenate all extracted reasoning blocks, if any.
pub fn reasoning(&self) -> Option<String> {
Some(
self.iter()
.filter_map(|block| match block {
AssistantContentBlock::Reasoning { text } => Some(text.as_str()),
_ => None,
})
.collect(),
)
.filter(|s: &String| !s.is_empty())
}
/// Return whether this assistant message contains any non-empty reasoning
/// text blocks.
pub fn has_reasoning(&self) -> bool {
self.iter().any(|block| match block {
AssistantContentBlock::Reasoning { text } => !text.is_empty(),
_ => false,
})
}
/// Return finalized assistant tool calls in encounter order.
pub fn tool_calls(&self) -> impl Iterator<Item = &AssistantToolCall> {
self.iter().filter_map(AssistantContentBlock::as_tool_call)
}
/// Return whether this assistant message contains any tool-call blocks.
pub fn has_tool_calls(&self) -> bool {
self.iter().any(|block| matches!(block, AssistantContentBlock::ToolCall(_)))
}
}
/// Final structured assistant message assembled from the event stream.
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
pub struct AssistantMessage {
pub content: Vec<AssistantContentBlock>,
}
impl Deref for AssistantMessage {
type Target = [AssistantContentBlock];
fn deref(&self) -> &Self::Target {
&self.content
}
}
impl AssistantMessage {
/// Push one new block to the end of the message content.
pub(crate) fn push_block(&mut self, block: AssistantContentBlock) {
self.content.push(block);
}
}
/// Streamed chat event emitted by [`crate::ChatEventStream`].
#[derive(Debug, Clone, PartialEq)]
pub enum ChatEvent {
/// The request was accepted, streaming has started, and prompt metadata is
/// ready.
Start {
/// The actual prompt token IDs for this request.
prompt_token_ids: Arc<[u32]>,
/// Once-only prompt logprobs metadata, when requested.
prompt_logprobs: Option<DecodedPromptLogprobs>,
},
/// A new assistant output block has started.
BlockStart {
index: usize,
kind: AssistantBlockKind,
},
/// A newly observed delta for one open assistant output block.
BlockDelta {
index: usize,
kind: AssistantBlockKind,
delta: String,
},
/// Per-decoded-update sample metadata: logprobs and/or output token IDs.
LogprobsDelta {
logprobs: Option<DecodedLogprobs>,
token_ids: Vec<u32>,
},
/// One assistant output block has ended.
BlockEnd {
index: usize,
block: AssistantContentBlock,
},
/// One tool call has started.
ToolCallStart {
index: usize,
id: String,
name: String,
},
/// One incremental tool-call arguments delta for the currently open tool
/// call.
ToolCallArgumentsDelta { index: usize, delta: String },
/// One tool call has ended.
ToolCallEnd {
index: usize,
call: AssistantToolCall,
},
/// Terminal event carrying the final assembled assistant message and finish
/// metadata.
Done {
message: AssistantMessage,
/// Number of prompt tokens actually sent to the engine after chat
/// template rendering and tokenization.
prompt_token_count: usize,
/// Number of output tokens generated.
output_token_count: usize,
finish_reason: FinishReason,
/// Connector-specific KV transfer parameters for disaggregated serving.
kv_transfer_params: Option<serde_json::Value>,
},
}
+249
View File
@@ -0,0 +1,249 @@
//! Minimal chat facade above [`vllm_text`].
//!
//! This crate keeps the northbound boundary intentionally small:
//! `messages -> rendered prompt -> tokenized prompt -> engine request ->
//! streamed structured assistant events`. The request side remains text-first,
//! while the response side can emit structured reasoning and final-answer
//! blocks. It is closer to vLLM's internal chat-rendering flow than to a full
//! OpenAI-compatible surface.
pub use backend::hf::HfChatBackend;
pub use backend::{
ChatBackend, ChatTextBackend, DynChatBackend, DynChatTextBackend, LoadModelBackendsOptions,
LoadedModelBackends, NewChatOutputProcessorOptions, load_model_backends,
};
pub use error::{Error, Result};
pub use event::{
AssistantBlockKind, AssistantContentBlock, AssistantMessage, AssistantMessageExt,
AssistantToolCall, ChatEvent,
};
use futures::{StreamExt, TryStreamExt as _};
pub use output::{
ChatOutputProcessor, DefaultChatOutputProcessor, DynChatOutputProcessor,
HarmonyChatOutputProcessor,
};
pub use parser::ParserSelection;
pub use parser::reasoning::{
ReasoningDelta, ReasoningError, ReasoningParser, ReasoningParserFactory,
};
pub use parser::tool::{ToolParser, ToolParserError, ToolParserFactory};
pub use renderer::hf::ChatTemplateContentFormatOption;
pub use renderer::{
ChatRenderer, DeepSeekV4ChatRenderer, DeepSeekV32ChatRenderer, DynChatRenderer, RenderedPrompt,
RendererSelection,
};
pub use request::{
ChatContent, ChatContentPart, ChatMessage, ChatOptions, ChatRequest, ChatRole, ChatTool,
ChatToolChoice, GenerationPromptMode, ReasoningEffort, SamplingParams,
};
pub use stream::{ChatEventStream, ChatEventStreamTrait, CollectedAssistantMessage};
pub use vllm_llm::FinishReason;
mod backend;
mod error;
mod event;
pub mod multimodal;
mod output;
mod parser;
mod renderer;
mod request;
mod stream;
use vllm_engine_core_client::EngineCoreClient;
use vllm_engine_core_client::protocol::ModelDtype;
use vllm_llm::Llm;
use vllm_text::{TextLlm, TextRequest};
/// Validate explicit parser override names without starting request processing.
pub fn validate_parser_overrides(
tool_call_parser: &ParserSelection,
reasoning_parser: &ParserSelection,
) -> Result<()> {
let tool_parser_factory = ToolParserFactory::global();
if let ParserSelection::Explicit(name) = tool_call_parser
&& !tool_parser_factory.contains(name)
{
return Err(Error::ParserUnavailableByName {
kind: "tool",
name: name.clone(),
available_names: tool_parser_factory.list(),
});
}
let reasoning_parser_factory = ReasoningParserFactory::global();
if let ParserSelection::Explicit(name) = reasoning_parser
&& !reasoning_parser_factory.contains(name)
{
return Err(Error::ParserUnavailableByName {
kind: "reasoning",
name: name.clone(),
available_names: reasoning_parser_factory.list(),
});
}
Ok(())
}
/// Structured chat facade above [`TextLlm`].
///
/// This layer stays above raw text semantics: it takes care of chat-template
/// rendering, exposes structured assistant events, and adds chat-specific
/// request semantics such as tool calls.
pub struct ChatLlm {
text: TextLlm,
backend: DynChatBackend,
/// Effective model dtype reported by the engine.
model_dtype: Option<ModelDtype>,
/// Tool-call parser selection.
tool_call_parser: ParserSelection,
/// Reasoning parser selection.
reasoning_parser: ParserSelection,
}
impl ChatLlm {
/// Create a new chat facade from a text-generation facade plus a chat
/// backend.
pub fn new(text: TextLlm, backend: DynChatBackend) -> Self {
let model_dtype = text.engine_core_client().model_dtype();
Self {
text,
backend,
model_dtype,
tool_call_parser: ParserSelection::Auto,
reasoning_parser: ParserSelection::Auto,
}
}
/// Convenience constructor for one shared backend object that implements
/// both text and chat responsibilities.
pub fn from_shared_backend(llm: Llm, backend: DynChatTextBackend) -> Self {
let text = TextLlm::new(llm, backend.clone());
Self::new(text, backend)
}
/// Set tool-call parser selection.
pub fn with_tool_call_parser(mut self, selection: ParserSelection) -> Self {
self.tool_call_parser = selection;
self
}
/// Set reasoning parser selection.
pub fn with_reasoning_parser(mut self, selection: ParserSelection) -> Self {
self.reasoning_parser = selection;
self
}
/// Override the effective model dtype used for multimodal tensor encoding.
pub fn with_model_dtype(mut self, model_dtype: Option<ModelDtype>) -> Self {
self.model_dtype = model_dtype;
self
}
/// Expose the underlying text facade for raw text-generation routes such as
/// `/v1/completions`.
pub fn text(&self) -> &TextLlm {
&self.text
}
/// Return the model ID reported by the underlying text backend.
pub fn model_id(&self) -> &str {
self.text.model_id()
}
/// Expose the underlying engine-core client for low-level utility/admin
/// calls.
pub fn engine_core_client(&self) -> &EngineCoreClient {
self.text.engine_core_client()
}
/// Render, tokenize, and submit one chat request.
pub async fn chat(&self, mut request: ChatRequest) -> Result<ChatEventStream> {
request.validate()?;
let output_processor = self.backend.new_chat_output_processor(
&mut request,
NewChatOutputProcessorOptions {
tool_call_parser: &self.tool_call_parser,
reasoning_parser: &self.reasoning_parser,
},
)?;
let rendered = self.backend.chat_renderer().render(&request)?;
let (prompt, mm_features) = multimodal::finalize_rendered_prompt(
&request,
rendered,
self.backend.multimodal_model_info(),
self.model_dtype,
)
.await?;
let text_request = TextRequest {
request_id: request.request_id.clone(),
prompt,
mm_features,
sampling_params: request.sampling_params,
decode_options: request.decode_options,
intermediate: request.intermediate,
priority: request.priority,
cache_salt: request.cache_salt,
add_special_tokens: request.add_special_tokens,
data_parallel_rank: request.data_parallel_rank,
};
let decoded_stream = self.text.generate(text_request).await?.map_err(Error::from).boxed();
let structured_stream = output_processor.process(decoded_stream)?;
Ok(ChatEventStream::new(request.request_id, structured_stream))
}
/// Shut down the underlying LLM client and its background tasks.
pub async fn shutdown(self) -> Result<()> {
self.text.shutdown().await?;
Ok(())
}
}
#[cfg(test)]
mod tests {
use thiserror_ext::AsReport;
use super::{ParserSelection, validate_parser_overrides};
use crate::parser::reasoning::names;
#[test]
fn validate_parser_overrides_accepts_registered_names() {
validate_parser_overrides(
&ParserSelection::Explicit("llama3_json".to_string()),
&ParserSelection::Explicit(names::QWEN3.to_string()),
)
.unwrap();
}
#[test]
fn validate_parser_overrides_accepts_auto_and_none() {
validate_parser_overrides(&ParserSelection::Auto, &ParserSelection::None).unwrap();
}
#[test]
fn validate_parser_overrides_rejects_unknown_tool_parser() {
let error = validate_parser_overrides(
&ParserSelection::Explicit("definitely_missing_tool_parser".to_string()),
&ParserSelection::Auto,
)
.unwrap_err();
expect_test::expect!["tool parser `definitely_missing_tool_parser` is not registered (choose from: deepseek_v3, deepseek_v31, deepseek_v32, deepseek_v4, gemma4, glm45, glm47, hermes, kimi_k2, llama3_json, llama4_json, minimax_m2, mistral, qwen3_coder, qwen3_xml)"].assert_eq(&error.to_report_string());
}
#[test]
fn validate_parser_overrides_rejects_unknown_reasoning_parser() {
let error = validate_parser_overrides(
&ParserSelection::Auto,
&ParserSelection::Explicit("definitely_missing_reasoning_parser".to_string()),
)
.unwrap_err();
expect_test::expect!["reasoning parser `definitely_missing_reasoning_parser` is not registered (choose from: cohere_cmd, deepseek_r1, deepseek_v3, deepseek_v4, gemma4, glm45, kimi, kimi_k2, minimax_m2, nemotron_v3, qwen3, step3)"].assert_eq(&error.to_report_string());
}
}
+775
View File
@@ -0,0 +1,775 @@
//! Chat-layer multimodal image preparation.
//!
//! This module owns the narrow image-only multimodal path for chat requests:
//! it extracts image parts from structured chat messages, fetches and
//! preprocesses them through `llm-multimodal`, expands rendered prompt
//! placeholders after tokenization, and builds the engine-facing
//! `MmFeatures` payload.
//!
//! Raw media stays above `vllm-text`; this module lowers it into token IDs and
//! opaque tensor payloads before the request is handed to text generation.
use std::collections::{HashMap, HashSet};
use std::fs;
use std::path::Path;
use std::sync::{Arc, LazyLock, Once};
use itertools::izip;
use llm_multimodal::{
AsyncMultiModalTracker, FieldLayout, ImagePreProcessor, ImageProcessorRegistry, MediaConnector,
MediaConnectorConfig, MediaContentPart, Modality, ModelMetadata, ModelProcessorSpec,
ModelRegistry, PreProcessorConfig, PreprocessedImages, PromptReplacement, TokenResolver,
TrackedMedia,
};
use tracing::warn;
use vllm_engine_core_client::protocol::ModelDtype;
use vllm_engine_core_client::protocol::multimodal::{
MmBatchedField, MmFeatureSpec, MmFeatures, MmField, MmFieldElem, MmFlatField, MmKwargsItem,
MmSharedField, MmSlice, PlaceholderRange, SliceSpec,
};
use vllm_engine_core_client::protocol::tensor::WireTensor;
use vllm_text::Prompt;
use vllm_text::tokenizer::{DynTokenizer, Tokenizer};
use crate::error::{Error, Result, bail_multimodal, multimodal};
use crate::renderer::RenderedPrompt;
use crate::request::{ChatContent, ChatContentPart, ChatMessage, ChatRequest};
mod tensor;
/// Resolved multimodal support for one loaded model.
#[derive(Clone)]
pub struct MultimodalModelInfo {
context: MultimodalModelContext,
spec: ResolvedMultimodalSpec,
image_processor: ResolvedImageProcessor,
media_connector: Arc<MediaConnector>,
}
/// Model metadata and tokenizer access shared by all multimodal specs.
#[derive(Clone)]
struct MultimodalModelContext {
model_id: String,
model_type: Option<String>,
config: serde_json::Value,
tokenizer: TokenizerResolver,
}
impl MultimodalModelContext {
fn metadata(&self) -> ModelMetadata<'_> {
ModelMetadata {
model_id: &self.model_id,
tokenizer: &self.tokenizer,
config: &self.config,
}
}
fn tokenizer(&self) -> &dyn Tokenizer {
self.tokenizer.0.as_ref()
}
/// Resolve a static model processor spec for one loaded model.
fn resolve_model_spec(&self) -> Option<&'static dyn ModelProcessorSpec> {
static REGISTRY: LazyLock<ModelRegistry> = LazyLock::new(ModelRegistry::new);
REGISTRY.lookup(&self.metadata())
}
/// Resolve a static image preprocessor for one loaded model.
fn resolve_image_processor(&self) -> Option<&'static dyn ImagePreProcessor> {
static REGISTRY: LazyLock<ImageProcessorRegistry> =
LazyLock::new(ImageProcessorRegistry::with_defaults);
REGISTRY.find(&self.model_id, self.model_type.as_deref())
}
}
/// Static model-specific prompt and tensor-layout behavior.
#[derive(Clone)]
struct ResolvedMultimodalSpec {
raw: &'static dyn ModelProcessorSpec,
placeholder_token: String,
placeholder_marker_token_id: u32,
placeholder_embed_token_id: u32,
field_layouts: HashMap<String, FieldLayout>,
keep_on_cpu_keys: HashSet<String>,
}
impl ResolvedMultimodalSpec {
fn new(raw: &'static dyn ModelProcessorSpec, context: &MultimodalModelContext) -> Result<Self> {
let metadata = context.metadata();
let placeholder_token =
raw.placeholder_token(&metadata).map_err(|error| multimodal!("{error}"))?;
// This is the rendered prompt marker, so resolve it from the token
// string itself. Do not use `ModelProcessorSpec::placeholder_token_id()`:
// for some specs that ID is the replacement vision/patch token,
// not necessarily the token ID of `placeholder_token`.
let placeholder_marker_token_id =
context.tokenizer().token_to_id(&placeholder_token).ok_or_else(|| {
multimodal!(
"placeholder token `{placeholder_token}` is not in the tokenizer vocabulary"
)
})?;
let placeholder_embed_token_id =
raw.placeholder_token_id(&metadata).map_err(|error| multimodal!("{error}"))? as u32;
Ok(Self {
raw,
placeholder_token,
placeholder_marker_token_id,
placeholder_embed_token_id,
field_layouts: raw.field_layouts(),
keep_on_cpu_keys: raw.keep_on_cpu_keys().into_iter().collect(),
})
}
fn prompt_replacements(
&self,
context: &MultimodalModelContext,
preprocessed: &PreprocessedImages,
) -> Result<Vec<PromptReplacement>> {
self.raw
.prompt_replacements(&context.metadata(), preprocessed)
.map_err(|error| multimodal!("{error}"))
}
}
/// Static image preprocessor plus its loaded config.
#[derive(Clone)]
struct ResolvedImageProcessor {
raw: &'static dyn ImagePreProcessor,
config: PreProcessorConfig,
}
/// Request-scoped fetched media, kept together with tracker UUID metadata.
struct FetchedImageMedia {
frames: Vec<Arc<llm_multimodal::ImageFrame>>,
uuids: Vec<Option<String>>,
}
impl MultimodalModelInfo {
/// Load and resolve multimodal support from model files.
///
/// Returns `Ok(Some(_))` only when both the model spec and image processor
/// are registered. File read/parse failures are real errors; unsupported
/// model families are logged and returned as `Ok(None)`.
pub fn from_paths(
model_id: String,
model_type: Option<String>,
config_path: Option<&Path>,
preprocessor_config_path: Option<&Path>,
tokenizer: DynTokenizer,
) -> Result<Option<Self>> {
let config = match config_path {
Some(path) => {
let text = fs::read_to_string(path)
.map_err(|error| multimodal!("failed to read config.json: {error}"))?;
serde_json::from_str(&text)
.map_err(|error| multimodal!("failed to parse config.json: {error}"))?
}
None => serde_json::Value::Object(Default::default()),
};
let preprocessor_config = match preprocessor_config_path {
Some(path) => {
let text = fs::read_to_string(path).map_err(|error| {
multimodal!("failed to read preprocessor_config.json: {error}")
})?;
PreProcessorConfig::from_json(&text).map_err(|error| {
multimodal!("failed to parse preprocessor_config.json: {error}")
})?
}
None => PreProcessorConfig::default(),
};
let context = MultimodalModelContext {
model_id,
model_type,
config,
tokenizer: TokenizerResolver(tokenizer),
};
let Some(spec) = context.resolve_model_spec() else {
warn!(
model_id = context.model_id,
model_type = context.model_type,
"multimodal model spec is not registered; disabling multimodal support for this model"
);
return Ok(None);
};
let spec = ResolvedMultimodalSpec::new(spec, &context)?;
let Some(image_processor) = context.resolve_image_processor() else {
warn!(
model_id = context.model_id,
model_type = context.model_type,
"image processor is not registered; disabling multimodal support for this model"
);
return Ok(None);
};
let media_connector = Arc::new(
MediaConnector::new(reqwest::Client::new(), MediaConnectorConfig::default())
.map_err(|error| multimodal!("{error}"))?,
);
Ok(Some(Self {
context,
spec,
image_processor: ResolvedImageProcessor {
raw: image_processor,
config: preprocessor_config,
},
media_connector,
}))
}
/// Return the template-visible placeholder token for this model.
///
/// The HF renderer uses this token while flattening image content in string
/// content format.
pub(crate) fn placeholder_token(&self) -> &str {
&self.spec.placeholder_token
}
}
/// Finalize a rendered chat prompt into text-generation input.
///
/// Text-only requests pass through unchanged as `Prompt::Text`. Multimodal
/// requests are tokenized in chat, their image placeholders are expanded, and
/// preprocessed image features are attached for engine-core transport.
pub(crate) async fn finalize_rendered_prompt(
request: &ChatRequest,
rendered: RenderedPrompt,
info: Option<&MultimodalModelInfo>,
model_dtype: Option<ModelDtype>,
) -> Result<(Prompt, Option<MmFeatures>)> {
if !request.has_multimodal() {
return Ok((rendered.prompt, None));
}
let info = info.ok_or(Error::UnsupportedMultimodalRenderer)?;
let Prompt::Text(prompt) = rendered.prompt else {
bail_multimodal!("multimodal chat renderer must return a text prompt before expansion");
};
let media_parts = extract_media_parts(request)?;
let model_dtype = model_dtype.unwrap_or_else(|| {
static WARN_ONCE: Once = Once::new();
WARN_ONCE.call_once(|| {
warn!(
"engine handshake did not report model dtype; \
falling back to float32 for multimodal tensor encoding"
);
});
ModelDtype::Float32
});
let mut prompt_token_ids = info
.context
.tokenizer()
.encode(&prompt, request.add_special_tokens)
.map_err(|error| multimodal!("{error}"))?;
let prepared = info.prepare_multimodal(media_parts, &mut prompt_token_ids, model_dtype).await?;
Ok((Prompt::TokenIds(prompt_token_ids), Some(prepared)))
}
/// Extract image media parts from chat messages in message/content order.
///
/// Assistant history is skipped because generated assistant blocks are already
/// represented as text for prompt rendering in this crate.
fn extract_media_parts(request: &ChatRequest) -> Result<Vec<MediaContentPart>> {
let mut all_parts = Vec::new();
for message in &request.messages {
let content = match message {
ChatMessage::System { content }
| ChatMessage::Developer { content, .. }
| ChatMessage::User { content }
| ChatMessage::ToolResponse { content, .. } => content,
ChatMessage::Assistant { .. } => continue,
};
let ChatContent::Parts(parts) = content else {
continue;
};
for part in parts {
match part {
ChatContentPart::Text { .. } => {}
ChatContentPart::ImageUrl {
image_url,
detail,
uuid,
} => all_parts.push(MediaContentPart::ImageUrl {
url: image_url.clone(),
detail: *detail,
uuid: uuid.clone(),
}),
}
}
}
Ok(all_parts)
}
impl MultimodalModelInfo {
/// Run media fetch, image preprocessing, prompt expansion, and feature
/// build.
///
/// `prompt_token_ids` is mutated in place because placeholder expansion
/// changes both the final prompt and the offsets recorded in
/// `PlaceholderRange`.
async fn prepare_multimodal(
&self,
media_parts: Vec<MediaContentPart>,
prompt_token_ids: &mut Vec<u32>,
model_dtype: ModelDtype,
) -> Result<MmFeatures> {
if media_parts.is_empty() {
return Ok(Vec::new());
}
let media_parts_len = media_parts.len();
let fetched = self.fetch_images(media_parts).await?;
let preprocessed = self.preprocess_images(&fetched.frames).await?;
let replacements = self.spec.prompt_replacements(&self.context, &preprocessed)?;
let ranges = self.expand_prompt_tokens(prompt_token_ids, replacements)?;
let features = self.build_features(preprocessed, fetched, ranges, model_dtype)?;
if features.len() != media_parts_len {
bail_multimodal!(
"number of built multimodal features {} does not match number of media parts {}",
features.len(),
media_parts_len
);
}
Ok(features)
}
/// Fetch all image parts and preserve their request-order UUID metadata.
async fn fetch_images(&self, media_parts: Vec<MediaContentPart>) -> Result<FetchedImageMedia> {
let mut tracker = AsyncMultiModalTracker::new(Arc::clone(&self.media_connector));
for part in media_parts {
tracker.push_part(part).map_err(|error| multimodal!("{error}"))?;
}
let tracker_output = tracker.finalize().await.map_err(|error| multimodal!("{error}"))?;
let images = tracker_output.data.get(&Modality::Image).cloned().unwrap_or_default();
let uuids = tracker_output.uuids.get(&Modality::Image).cloned().unwrap_or_default();
let frames = images
.into_iter()
.map(|media| match media {
TrackedMedia::Image(frame) => Ok(frame),
_ => Err(Error::UnsupportedMultimodalContent("non-image")),
})
.collect::<Result<Vec<_>>>()?;
Ok(FetchedImageMedia { frames, uuids })
}
/// Preprocess fetched image frames with the model's resolved image
/// processor.
///
/// The processor work is CPU-heavy relative to request wiring, so it runs
/// in a blocking task and returns owned tensors ready for wire
/// conversion.
async fn preprocess_images(
&self,
image_frames: &[Arc<llm_multimodal::ImageFrame>],
) -> Result<PreprocessedImages> {
let config = self.image_processor.config.clone();
let processor = self.image_processor.raw;
let images = image_frames.iter().map(|frame| frame.data().clone()).collect::<Vec<_>>();
tokio::task::spawn_blocking(move || {
processor.preprocess(&images, &config).map_err(|error| multimodal!("{error}"))
})
.await
.map_err(|error| multimodal!("image preprocessing task failed: {error}"))?
}
/// Replace rendered placeholder markers with model-specific replacement
/// tokens.
///
/// Replacements are consumed in order, matching the original media-part
/// order. The returned ranges point into the already-expanded prompt.
fn expand_prompt_tokens(
&self,
prompt_token_ids: &mut Vec<u32>,
replacements: Vec<PromptReplacement>,
) -> Result<Vec<PlaceholderRange>> {
let mut cursor = 0;
let mut ranges = Vec::with_capacity(replacements.len());
for replacement in replacements {
if replacement.modality != Modality::Image {
bail_multimodal!(
"unsupported prompt replacement modality `{}`",
replacement.modality
);
}
let offset = find_next_token(
prompt_token_ids,
self.spec.placeholder_marker_token_id,
cursor,
)
.ok_or_else(|| {
multimodal!(
"placeholder token `{}` was not found in tokenized prompt",
self.spec.placeholder_token
)
})?;
if replacement.tokens.is_empty() {
bail_multimodal!(
"placeholder token `{}` expanded to no tokens",
self.spec.placeholder_token
);
}
let replacement_len = replacement.tokens.len();
let replacement_tokens =
replacement.tokens.iter().map(|&token| token as u32).collect::<Vec<_>>();
let is_embed = {
let mask = replacement_tokens
.iter()
.map(|&token| token == self.spec.placeholder_embed_token_id)
.collect::<Vec<_>>();
WireTensor::from_bool(vec![replacement_len], mask).map_err(Error::Multimodal)?
};
prompt_token_ids.splice(offset..offset + 1, replacement_tokens);
ranges.push(PlaceholderRange {
offset,
length: replacement_len,
is_embed: Some(is_embed),
});
cursor = offset + replacement_len;
}
Ok(ranges)
}
/// Convert preprocessed image tensors into engine-core multimodal features.
///
/// One `MmFeatureSpec` is produced per image. Tensor fields are
/// sliced according to the model spec's field layout declarations.
fn build_features(
&self,
preprocessed: PreprocessedImages,
images: FetchedImageMedia,
ranges: Vec<PlaceholderRange>,
model_dtype: ModelDtype,
) -> Result<MmFeatures> {
let len = images.frames.len();
let tensors = tensor::collect_tensors(preprocessed, model_dtype)?;
let mut features = Vec::with_capacity(images.frames.len());
for (index, (frame, uuid, range)) in izip!(images.frames, images.uuids, ranges).enumerate()
{
let mut data = MmKwargsItem::new();
for (key, tensor) in &tensors {
let keep_on_cpu = self.spec.keep_on_cpu_keys.contains(key);
let (value, field) = match self.spec.field_layouts.get(key) {
Some(FieldLayout::Batched) => (
tensor.batched_value_at(index)?,
MmField::Batched(MmBatchedField { keep_on_cpu }),
),
Some(FieldLayout::Flat { sizes_key }) => {
let sizes = tensors.get(sizes_key).ok_or_else(|| {
multimodal!("flat tensor sizes key `{sizes_key}` is missing")
})?;
let (start, end) = tensor::flat_range_for_index(sizes, sizes_key, index)?;
(
tensor.flat_value_range(start, end)?,
MmField::Flat(MmFlatField {
slices: vec![MmSlice::Slice(SliceSpec {
start: Some(0),
stop: Some((end - start) as isize),
step: None,
})],
dim: 0,
keep_on_cpu,
}),
)
}
None => (
tensor.clone(),
MmField::Shared(MmSharedField {
batch_size: len,
keep_on_cpu,
}),
),
};
data.insert(
key.clone(),
MmFieldElem {
data: Some(value.try_into()?),
field,
},
);
}
let hash = frame.hash.clone();
features.push(MmFeatureSpec {
data: Some(data),
modality: "image".to_string(),
identifier: uuid.unwrap_or_else(|| hash.clone()),
mm_position: range,
mm_hash: Some(hash),
});
}
Ok(features)
}
}
/// Find `needle` in `haystack`, starting at `start`.
///
/// This is intentionally order-preserving rather than a global replace: each
/// image consumes the next placeholder occurrence.
fn find_next_token(haystack: &[u32], needle: u32, start: usize) -> Option<usize> {
haystack
.get(start..)?
.iter()
.position(|token| *token == needle)
.map(|offset| start + offset)
}
/// Adapter from the frontend tokenizer trait to `llm-multimodal`.
#[derive(Clone)]
struct TokenizerResolver(DynTokenizer);
impl TokenResolver for TokenizerResolver {
fn token_to_id(&self, token: &str) -> Option<u32> {
self.0.token_to_id(token)
}
fn id_to_token(&self, id: u32) -> Option<String> {
self.0.id_to_token(id)
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use llm_multimodal::TokenId;
use vllm_engine_core_client::protocol::tensor::WireArrayData;
use vllm_text::tokenizer::{IncrementalDecoder, Tokenizer, TokenizerError};
use super::*;
const LLAMA4_IMAGE_START_ID: u32 = 200088;
const LLAMA4_IMAGE_END_ID: u32 = 200089;
const LLAMA4_IMAGE_ID: u32 = 200090;
const LLAMA4_PATCH_ID: u32 = 200092;
const LLAMA4_TILE_X_SEPARATOR_ID: u32 = 200093;
const LLAMA4_TILE_Y_SEPARATOR_ID: u32 = 200094;
struct TestTokenizer;
impl Tokenizer for TestTokenizer {
fn encode(
&self,
text: &str,
_add_special_tokens: bool,
) -> std::result::Result<Vec<u32>, TokenizerError> {
Ok(match text {
"<|image|>" => vec![LLAMA4_IMAGE_ID],
text => text.bytes().map(u32::from).collect(),
})
}
fn decode(
&self,
_token_ids: &[u32],
_skip_special_tokens: bool,
) -> std::result::Result<String, TokenizerError> {
Ok(String::new())
}
fn token_to_id(&self, token: &str) -> Option<u32> {
match token {
"<|image_start|>" => Some(LLAMA4_IMAGE_START_ID),
"<|image_end|>" => Some(LLAMA4_IMAGE_END_ID),
"<|image|>" => Some(LLAMA4_IMAGE_ID),
"<|patch|>" => Some(LLAMA4_PATCH_ID),
"<|tile_x_separator|>" => Some(LLAMA4_TILE_X_SEPARATOR_ID),
"<|tile_y_separator|>" => Some(LLAMA4_TILE_Y_SEPARATOR_ID),
_ => None,
}
}
fn id_to_token(&self, id: u32) -> Option<String> {
match id {
LLAMA4_IMAGE_START_ID => Some("<|image_start|>".to_string()),
LLAMA4_IMAGE_END_ID => Some("<|image_end|>".to_string()),
LLAMA4_IMAGE_ID => Some("<|image|>".to_string()),
LLAMA4_PATCH_ID => Some("<|patch|>".to_string()),
LLAMA4_TILE_X_SEPARATOR_ID => Some("<|tile_x_separator|>".to_string()),
LLAMA4_TILE_Y_SEPARATOR_ID => Some("<|tile_y_separator|>".to_string()),
_ => None,
}
}
fn create_decode_stream(
&self,
_prompt_token_ids: &[u32],
_skip_special_tokens: bool,
_min_bytes_to_buffer: usize,
) -> Box<dyn IncrementalDecoder + '_> {
unreachable!("not used")
}
}
fn test_info(model_type: &str, config: serde_json::Value) -> MultimodalModelInfo {
let context = MultimodalModelContext {
model_id: format!("{model_type}-test"),
model_type: Some(model_type.to_string()),
config,
tokenizer: TokenizerResolver(Arc::new(TestTokenizer)),
};
let spec = context
.resolve_model_spec()
.unwrap_or_else(|| panic!("{model_type} spec should match"));
let spec = ResolvedMultimodalSpec::new(spec, &context).unwrap();
let raw_image_processor = context
.resolve_image_processor()
.unwrap_or_else(|| panic!("{model_type} image processor should match"));
let media_connector = Arc::new(
MediaConnector::new(reqwest::Client::new(), MediaConnectorConfig::default()).unwrap(),
);
MultimodalModelInfo {
context,
spec,
image_processor: ResolvedImageProcessor {
raw: raw_image_processor,
config: PreProcessorConfig::default(),
},
media_connector,
}
}
fn llama4_info() -> MultimodalModelInfo {
let config = serde_json::json!({
"model_type": "llama4",
"image_token_index": LLAMA4_PATCH_ID,
"vision_config": {"image_size": 336, "patch_size": 14}
});
test_info("llama4", config)
}
fn llama4_single_tile_replacement() -> PromptReplacement {
PromptReplacement::sequence(
Modality::Image,
"<|image|>",
vec![
LLAMA4_IMAGE_START_ID as TokenId,
LLAMA4_IMAGE_ID as TokenId,
LLAMA4_PATCH_ID as TokenId,
LLAMA4_PATCH_ID as TokenId,
LLAMA4_IMAGE_END_ID as TokenId,
],
)
}
fn llama4_multi_tile_replacement() -> PromptReplacement {
PromptReplacement::sequence(
Modality::Image,
"<|image|>",
vec![
LLAMA4_IMAGE_START_ID as TokenId,
LLAMA4_PATCH_ID as TokenId,
LLAMA4_TILE_X_SEPARATOR_ID as TokenId,
LLAMA4_PATCH_ID as TokenId,
LLAMA4_TILE_Y_SEPARATOR_ID as TokenId,
LLAMA4_IMAGE_ID as TokenId,
LLAMA4_PATCH_ID as TokenId,
LLAMA4_IMAGE_END_ID as TokenId,
],
)
}
fn assert_bool_mask(range: &PlaceholderRange, expected: &[bool]) {
let tensor = range.is_embed.as_ref().expect("is_embed mask");
assert_eq!(tensor.dtype, "bool");
assert_eq!(tensor.shape, vec![expected.len()]);
assert_eq!(
tensor.data,
WireArrayData::RawView(expected.iter().map(|value| u8::from(*value)).collect())
);
}
#[test]
fn expand_prompt_tokens_marks_only_llama4_patch_tokens_as_embed() {
let info = llama4_info();
let mut prompt_token_ids = vec![1, LLAMA4_IMAGE_ID, 2];
let replacements = vec![llama4_multi_tile_replacement()];
let ranges = info.expand_prompt_tokens(&mut prompt_token_ids, replacements).unwrap();
assert_eq!(
prompt_token_ids,
vec![
1,
LLAMA4_IMAGE_START_ID,
LLAMA4_PATCH_ID,
LLAMA4_TILE_X_SEPARATOR_ID,
LLAMA4_PATCH_ID,
LLAMA4_TILE_Y_SEPARATOR_ID,
LLAMA4_IMAGE_ID,
LLAMA4_PATCH_ID,
LLAMA4_IMAGE_END_ID,
2,
]
);
assert_eq!(ranges[0].offset, 1);
assert_eq!(ranges[0].length, 8);
assert_bool_mask(
&ranges[0],
&[false, true, false, true, false, false, true, false],
);
}
#[test]
fn expand_prompt_tokens_errors_when_placeholder_missing() {
let info = llama4_info();
let mut prompt_token_ids = vec![1, 2, 3];
let replacements = vec![llama4_single_tile_replacement()];
let error = info.expand_prompt_tokens(&mut prompt_token_ids, replacements).unwrap_err();
assert!(matches!(error, Error::Multimodal(message) if message.contains("not found")));
}
#[test]
fn expand_prompt_tokens_skips_llama4_image_marker_inside_replacement() {
let info = llama4_info();
let mut prompt_token_ids = vec![1, LLAMA4_IMAGE_ID, 2, LLAMA4_IMAGE_ID, 3];
let replacements = vec![
llama4_single_tile_replacement(),
llama4_single_tile_replacement(),
];
let ranges = info.expand_prompt_tokens(&mut prompt_token_ids, replacements).unwrap();
assert_eq!(
prompt_token_ids,
vec![
1,
LLAMA4_IMAGE_START_ID,
LLAMA4_IMAGE_ID,
LLAMA4_PATCH_ID,
LLAMA4_PATCH_ID,
LLAMA4_IMAGE_END_ID,
2,
LLAMA4_IMAGE_START_ID,
LLAMA4_IMAGE_ID,
LLAMA4_PATCH_ID,
LLAMA4_PATCH_ID,
LLAMA4_IMAGE_END_ID,
3,
]
);
assert_eq!(ranges[0].offset, 1);
assert_eq!(ranges[0].length, 5);
assert_bool_mask(&ranges[0], &[false, false, true, true, false]);
assert_eq!(ranges[1].offset, 7);
assert_eq!(ranges[1].length, 5);
assert_bool_mask(&ranges[1], &[false, false, true, true, false]);
}
}
+342
View File
@@ -0,0 +1,342 @@
use std::collections::HashMap;
use half::{bf16, f16};
use llm_multimodal::{ModelSpecificValue, PreprocessedImages};
use vllm_engine_core_client::protocol::ModelDtype;
use vllm_engine_core_client::protocol::multimodal::MmKwargValue as ProtocolKwargValue;
use vllm_engine_core_client::protocol::tensor::{ShapeExt as _, WireTensor};
use crate::error::{Error, Result, bail_multimodal, multimodal};
/// Representation for multimodal kwarg values for transformation.
#[derive(Debug, Clone)]
pub(super) enum KwargValue {
/// Float tensor with row-major flat data and shape.
F32Tensor { data: Vec<f32>, shape: Vec<usize> },
/// Float16 tensor with row-major flat data and shape.
F16Tensor { data: Vec<f16>, shape: Vec<usize> },
/// BFloat16 tensor with row-major flat data and shape.
Bf16Tensor { data: Vec<bf16>, shape: Vec<usize> },
/// Signed integer tensor with row-major flat data and shape.
I64Tensor { data: Vec<i64>, shape: Vec<usize> },
/// Unsigned integer tensor with row-major flat data and shape.
U32Tensor { data: Vec<u32>, shape: Vec<usize> },
/// Non-tensor kwarg value that is shared or copied as-is.
Passthrough(ProtocolKwargValue),
}
/// Collect `pixel_values` and model-specific outputs into one tensor map.
pub(super) fn collect_tensors(
preprocessed: PreprocessedImages,
float_dtype: ModelDtype,
) -> Result<HashMap<String, KwargValue>> {
let PreprocessedImages {
pixel_values,
model_specific,
..
} = preprocessed;
let pixel_values = {
let shape = pixel_values.shape().to_vec();
let data = pixel_values.into_iter().collect();
KwargValue::from_f32_tensor(data, shape, float_dtype)?
};
let mut tensors = HashMap::new();
tensors.insert("pixel_values".to_string(), pixel_values);
for (key, value) in model_specific {
tensors.insert(key, KwargValue::from_model_specific(value, float_dtype)?);
}
Ok(tensors)
}
impl KwargValue {
fn from_model_specific(value: ModelSpecificValue, float_dtype: ModelDtype) -> Result<Self> {
use ProtocolKwargValue::*;
Ok(match value {
ModelSpecificValue::Tensor { data, shape } => {
Self::from_f32_tensor(data, shape, float_dtype)?
}
ModelSpecificValue::IntTensor { data, shape } => Self::I64Tensor { data, shape },
ModelSpecificValue::UintTensor { data, shape } => Self::U32Tensor { data, shape },
ModelSpecificValue::Int(value) => Self::Passthrough(Int(value)),
ModelSpecificValue::Float(value) => Self::Passthrough(Float(value)),
ModelSpecificValue::IntVec(values) => {
Self::Passthrough(List(values.into_iter().map(Int).collect()))
}
ModelSpecificValue::UintVec(values) => Self::Passthrough(List(
values.into_iter().map(|value| Int(value as i64)).collect(),
)),
ModelSpecificValue::FloatVec(values) => Self::Passthrough(List(
values.into_iter().map(|value| Float(value as f64)).collect(),
)),
ModelSpecificValue::TupleVec(values) => Self::Passthrough(List(
values
.into_iter()
.map(|(height, width)| List(vec![Int(height as i64), Int(width as i64)]))
.collect(),
)),
ModelSpecificValue::Bool(value) => Self::Passthrough(Int(i64::from(value))),
})
}
/// Convert a float tensor to the target float dtype if needed, keeping the
/// same shape.
fn from_f32_tensor(data: Vec<f32>, shape: Vec<usize>, float_dtype: ModelDtype) -> Result<Self> {
match float_dtype {
ModelDtype::Float16 => Ok(Self::F16Tensor {
data: data.into_iter().map(f16::from_f32).collect(),
shape,
}),
ModelDtype::BFloat16 => Ok(Self::Bf16Tensor {
data: data.into_iter().map(bf16::from_f32).collect(),
shape,
}),
ModelDtype::Float32 => Ok(Self::F32Tensor { data, shape }),
}
}
}
impl TryFrom<KwargValue> for ProtocolKwargValue {
type Error = Error;
fn try_from(value: KwargValue) -> Result<Self> {
match value {
KwargValue::F32Tensor { data, shape } => Ok(Self::Tensor(
WireTensor::from_f32(shape, data).map_err(Error::Multimodal)?,
)),
KwargValue::F16Tensor { data, shape } => Ok(Self::Tensor(
WireTensor::from_f16(shape, data).map_err(Error::Multimodal)?,
)),
KwargValue::Bf16Tensor { data, shape } => Ok(Self::Tensor(
WireTensor::from_bf16(shape, data).map_err(Error::Multimodal)?,
)),
KwargValue::I64Tensor { data, shape } => Ok(Self::Tensor(
WireTensor::from_i64(shape, data).map_err(Error::Multimodal)?,
)),
KwargValue::U32Tensor { data, shape } => Ok(Self::Tensor(
WireTensor::from_u32(shape, data).map_err(Error::Multimodal)?,
)),
KwargValue::Passthrough(value) => Ok(value),
}
}
}
impl KwargValue {
/// Extract one image from a batched tensor field.
///
/// Batched fields use their first axis as image index and drop that axis in
/// the per-feature value, matching vLLM's batched-field semantics.
pub(super) fn batched_value_at(&self, index: usize) -> Result<Self> {
match self {
Self::F32Tensor { data, shape } => {
let (shape, data) = slice_first_axis_range(shape, data, index, index + 1, true)?;
Ok(Self::F32Tensor { data, shape })
}
Self::F16Tensor { data, shape } => {
let (shape, data) = slice_first_axis_range(shape, data, index, index + 1, true)?;
Ok(Self::F16Tensor { data, shape })
}
Self::Bf16Tensor { data, shape } => {
let (shape, data) = slice_first_axis_range(shape, data, index, index + 1, true)?;
Ok(Self::Bf16Tensor { data, shape })
}
Self::I64Tensor { data, shape } => {
let (shape, data) = slice_first_axis_range(shape, data, index, index + 1, true)?;
Ok(Self::I64Tensor { data, shape })
}
Self::U32Tensor { data, shape } => {
let (shape, data) = slice_first_axis_range(shape, data, index, index + 1, true)?;
Ok(Self::U32Tensor { data, shape })
}
Self::Passthrough(value) => Ok(Self::Passthrough(value.clone())),
}
}
/// Extract one image's variable-length range from a flat tensor field.
///
/// Flat fields keep the first axis as the sliced length for this image.
pub(super) fn flat_value_range(&self, start: usize, end: usize) -> Result<Self> {
match self {
Self::F32Tensor { data, shape } => {
let (shape, data) = slice_first_axis_range(shape, data, start, end, false)?;
Ok(Self::F32Tensor { data, shape })
}
Self::F16Tensor { data, shape } => {
let (shape, data) = slice_first_axis_range(shape, data, start, end, false)?;
Ok(Self::F16Tensor { data, shape })
}
Self::Bf16Tensor { data, shape } => {
let (shape, data) = slice_first_axis_range(shape, data, start, end, false)?;
Ok(Self::Bf16Tensor { data, shape })
}
Self::I64Tensor { data, shape } => {
let (shape, data) = slice_first_axis_range(shape, data, start, end, false)?;
Ok(Self::I64Tensor { data, shape })
}
Self::U32Tensor { data, shape } => {
let (shape, data) = slice_first_axis_range(shape, data, start, end, false)?;
Ok(Self::U32Tensor { data, shape })
}
Self::Passthrough(value) => Ok(Self::Passthrough(value.clone())),
}
}
}
/// Compute the first-axis range for one image in a flat tensor.
///
/// `sizes_key` names a companion tensor whose entries are cumulative slice
/// sizes per image.
pub(super) fn flat_range_for_index(
sizes: &KwargValue,
sizes_key: &str,
index: usize,
) -> Result<(usize, usize)> {
let sizes = tensor_as_usize_vec(sizes)?;
let size = *sizes.get(index).ok_or_else(|| {
multimodal!("flat tensor sizes key `{sizes_key}` has no entry for image {index}")
})?;
let start = sizes[..index].iter().sum::<usize>();
Ok((start, start + size))
}
/// Read a tensor value as per-image sizes for flat slicing.
fn tensor_as_usize_vec(tensor: &KwargValue) -> Result<Vec<usize>> {
match tensor {
KwargValue::I64Tensor { data, .. } => data
.iter()
.map(|value| {
usize::try_from(*value)
.map_err(|_| multimodal!("negative flat tensor size `{value}`"))
})
.collect(),
KwargValue::U32Tensor { data, .. } => {
Ok(data.iter().map(|value| *value as usize).collect())
}
_ => Err(multimodal!("flat tensor sizes must be int64 or uint32")),
}
}
/// Slice a flat row-major tensor along its first axis.
fn slice_first_axis_range<T: Clone>(
shape: &[usize],
data: &[T],
start: usize,
end: usize,
drop_axis: bool,
) -> Result<(Vec<usize>, Vec<T>)> {
let first_dim = *shape.first().ok_or_else(|| multimodal!("tensor has no first dimension"))?;
if start > end || end > first_dim {
bail_multimodal!("invalid tensor slice {start}..{end} for first dimension {first_dim}");
}
let expected_len = shape
.checked_numel()
.ok_or_else(|| multimodal!("tensor shape {shape:?} has too many elements"))?;
if expected_len != data.len() {
bail_multimodal!(
"tensor shape {shape:?} expects {expected_len} elements, got {}",
data.len()
);
}
let stride = shape[1..].iter().product::<usize>();
let data_start = start * stride;
let data_end = end * stride;
let out_shape = if drop_axis {
shape[1..].to_vec()
} else {
let mut shape = shape.to_vec();
shape[0] = end - start;
shape
};
Ok((out_shape, data[data_start..data_end].to_vec()))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn batched_value_at_drops_first_axis() {
let value = KwargValue::F32Tensor {
data: vec![1.0, 2.0, 3.0, 4.0],
shape: vec![2, 2],
};
let value = value.batched_value_at(1).unwrap();
assert!(matches!(
value,
KwargValue::F32Tensor { data, shape }
if shape == vec![2] && data == vec![3.0, 4.0]
));
}
#[test]
fn flat_value_range_keeps_first_axis() {
let value = KwargValue::U32Tensor {
data: (0..10).collect(),
shape: vec![5, 2],
};
let value = value.flat_value_range(1, 3).unwrap();
assert!(matches!(
value,
KwargValue::U32Tensor { data, shape }
if shape == vec![2, 2] && data == vec![2, 3, 4, 5]
));
}
#[test]
fn flat_range_for_index_uses_size_tensor() {
let sizes = KwargValue::I64Tensor {
data: vec![2, 3, 4],
shape: vec![3],
};
assert_eq!(
flat_range_for_index(&sizes, "image_grid_thw", 1).unwrap(),
(2, 5)
);
}
#[test]
fn slice_first_axis_range_errors_on_shape_data_mismatch() {
let error = slice_first_axis_range(&[2, 2], &[1.0_f32, 2.0, 3.0], 0, 1, true).unwrap_err();
assert!(
matches!(error, Error::Multimodal(message) if message.contains("expects 4 elements"))
);
}
#[test]
fn bfloat16_tensor_wire_uses_bfloat16_dtype() {
let value =
KwargValue::from_f32_tensor(vec![1.0, -1.0], vec![2], ModelDtype::BFloat16).unwrap();
let ProtocolKwargValue::Tensor(tensor) = ProtocolKwargValue::try_from(value).unwrap()
else {
panic!("expected tensor");
};
assert_eq!(tensor.dtype, "bfloat16");
assert_eq!(tensor.shape, vec![2]);
assert_eq!(tensor.data.into_raw_view().unwrap().len(), 4);
}
#[test]
fn float16_tensor_wire_uses_float16_dtype() {
let value =
KwargValue::from_f32_tensor(vec![1.0, -1.0], vec![2], ModelDtype::Float16).unwrap();
let ProtocolKwargValue::Tensor(tensor) = ProtocolKwargValue::try_from(value).unwrap()
else {
panic!("expected tensor");
};
assert_eq!(tensor.dtype, "float16");
assert_eq!(tensor.shape, vec![2]);
assert_eq!(tensor.data.into_raw_view().unwrap().len(), 4);
}
}
+166
View File
@@ -0,0 +1,166 @@
//! Default output processing pipeline.
mod reasoning;
mod tool;
use std::sync::Once;
use futures::{Stream, StreamExt as _};
use tracing::info;
use trait_set::trait_set;
use vllm_text::tokenizer::DynTokenizer;
use self::reasoning::reasoning_event_stream;
use self::tool::tool_event_stream;
use super::structured::structured_chat_event_stream;
use crate::error::Result;
use crate::output::{
AssistantEvent, ChatOutputProcessor, ContentEvent, DynChatEventStream,
DynDecodedTextEventStream,
};
use crate::parser::ParserSelection;
use crate::parser::reasoning::{ReasoningParser, ReasoningParserFactory};
use crate::parser::tool::{ToolParser, ToolParserFactory};
use crate::request::{ChatRequest, ChatToolChoice};
use crate::{Error, Result as ChatResult};
trait_set! {
trait ContentEventStream = Stream<Item = Result<ContentEvent>> + Send + 'static;
}
/// Default request-scoped output processor used by Hugging Face style chat
/// backends.
///
/// This implementation assumes the backend already emitted decoded text deltas,
/// then optionally layers reasoning parsing and tool-call parsing before
/// assembling final structured chat events.
pub struct DefaultChatOutputProcessor {
reasoning_parser: Option<Box<dyn ReasoningParser>>,
tool_parser: Option<Box<dyn ToolParser>>,
}
impl DefaultChatOutputProcessor {
/// Build the default output processor and apply any parser-specific request
/// adjustments.
///
/// Parser resolution happens here so that request validation, prompt
/// rendering, and streaming all observe the same parser-adjusted
/// request state.
pub fn new(
request: &mut ChatRequest,
model_id: &str,
tokenizer: DynTokenizer,
tool_call_parser: &ParserSelection,
reasoning_parser: &ParserSelection,
) -> ChatResult<Self> {
let tool_parsing_enabled =
matches!(request.tool_choice, ChatToolChoice::Auto) && !request.tools.is_empty();
let tool_parser = if tool_parsing_enabled {
Some(Self::resolve_tool_parser(
request,
model_id,
tool_call_parser,
)?)
} else {
None
};
let reasoning_parser = Self::resolve_optional_reasoning_parser(
request,
model_id,
tokenizer,
reasoning_parser,
)?;
Ok(Self {
reasoning_parser,
tool_parser,
})
}
/// Build the plain-text-only default output processor.
///
/// This keeps the default structured chat-event assembly but disables both
/// reasoning parsing and tool-call parsing completely, so that all
/// content is treated as opaque text.
pub fn plain_text_only() -> Self {
Self {
reasoning_parser: None,
tool_parser: None,
}
}
fn resolve_tool_parser(
request: &mut ChatRequest,
model_id: &str,
selection: &ParserSelection,
) -> ChatResult<Box<dyn ToolParser>> {
let factory = ToolParserFactory::global();
let parser_name = match selection {
ParserSelection::Auto => factory.resolve_name_for_model(model_id).ok_or_else(|| {
Error::ParserUnavailableForModel {
kind: "tool",
model_id: model_id.to_string(),
}
})?,
ParserSelection::None => return Err(Error::ParserDisabled { kind: "tool" }),
ParserSelection::Explicit(name) => name.as_str(),
};
let parser = factory.create(parser_name, &request.tools)?;
if parser.preserve_special_tokens() {
request.decode_options.skip_special_tokens = false;
}
TOOL_PARSER_LOG_ONCE.call_once(|| info!(parser_name, "using tool parser"));
Ok(parser)
}
fn resolve_optional_reasoning_parser(
request: &mut ChatRequest,
model_id: &str,
tokenizer: DynTokenizer,
selection: &ParserSelection,
) -> ChatResult<Option<Box<dyn ReasoningParser>>> {
let factory = ReasoningParserFactory::global();
let parser_name = match selection {
ParserSelection::Auto => factory.resolve_name_for_model(model_id),
ParserSelection::None => None,
ParserSelection::Explicit(name) => Some(name.as_str()),
};
let Some(parser_name) = parser_name else {
REASONING_PARSER_LOG_ONCE.call_once(|| info!("reasoning parsing disabled"));
return Ok(None);
};
let parser = factory.create(parser_name, tokenizer)?;
if parser.preserve_special_tokens() {
request.decode_options.skip_special_tokens = false;
}
REASONING_PARSER_LOG_ONCE.call_once(|| info!(parser_name, "using reasoning parser"));
Ok(Some(parser))
}
}
static TOOL_PARSER_LOG_ONCE: Once = Once::new();
static REASONING_PARSER_LOG_ONCE: Once = Once::new();
impl ChatOutputProcessor for DefaultChatOutputProcessor {
/// Transforms a raw generate-output token stream into structured chat
/// events through three sequential stages once text decoding has
/// already happened:
///
/// 1. [`reasoning_event_stream`] — reasoning/content separation
/// 2. [`tool_event_stream`] — tool-call parsing
/// 3. [`structured_chat_event_stream`] — final block assembly
fn process(self: Box<Self>, decoded: DynDecodedTextEventStream) -> Result<DynChatEventStream> {
let reasoning = reasoning_event_stream(decoded, self.reasoning_parser);
let tool = tool_event_stream(reasoning, self.tool_parser);
let structured = structured_chat_event_stream(tool);
Ok(structured.boxed())
}
}
@@ -0,0 +1,504 @@
//! Adapts decoded text updates into reasoning-aware assistant deltas.
//!
//! This stage sits between low-level token decoding and final block assembly.
//! It is the only place in the new pipeline that understands reasoning
//! separation: `decoded.rs` still only produces plain text deltas, while later
//! stages consume the semantic `Text` / `Reasoning` split emitted here.
use asynk_strim_attr::{TryYielder, try_stream};
use futures::{StreamExt as _, pin_mut};
use thiserror_ext::AsReport;
use tracing::warn;
use vllm_text::output::DecodedTextEvent;
use super::ContentEvent;
use crate::Result;
use crate::error::Error;
use crate::event::AssistantBlockKind;
use crate::output::DecodedTextEventStream;
use crate::parser::reasoning::{ReasoningDelta, ReasoningParser};
/// Per-stream reasoning parsing state.
struct ReasoningState {
/// Reasoning parser for the current model family.
parser: Box<dyn ReasoningParser>,
/// Whether reasoning parsing has already failed for this stream.
parser_failed: bool,
}
impl ReasoningState {
/// Create one fresh reasoning-adaptation state for a new streamed response.
fn new(parser: Box<dyn ReasoningParser>) -> Self {
Self {
parser,
parser_failed: false,
}
}
/// Convert one decoded text delta into zero or more semantic assistant
/// deltas.
fn process_delta(&mut self, delta: String) -> Vec<ContentEvent> {
// If the parser has already failed, skip parsing and return plain text deltas.
if self.parser_failed {
return vec![ContentEvent::TextDelta {
kind: AssistantBlockKind::Text,
delta,
}];
}
let mut events = Vec::new();
match self.parser.push(&delta) {
Ok(result) => {
push_reasoning_delta(&mut events, result);
}
Err(error) => {
if !self.parser_failed {
warn!(
error = %error.as_report(),
"reasoning parser failed; falling back to plain text deltas"
);
self.parser_failed = true;
}
push_text_delta(&mut events, AssistantBlockKind::Text, delta);
}
}
events
}
/// Initialize parser state once prompt token IDs are available.
fn initialize(&mut self, prompt_token_ids: &[u32]) {
if self.parser_failed {
return;
}
match self.parser.initialize(prompt_token_ids) {
Ok(()) => {}
Err(error) => {
warn!(
error = %error.as_report(),
"failed to initialize reasoning parser; falling back to plain text deltas"
);
self.parser_failed = true;
}
}
}
/// Flush any parser-held partial delimiter state at end of stream.
fn finish(&mut self) -> Vec<ContentEvent> {
if self.parser_failed {
return Vec::new();
}
match self.parser.finish() {
Ok(result) => {
let mut events = Vec::new();
push_reasoning_delta(&mut events, result);
events
}
Err(error) => {
warn!(error = %error.as_report(), "failed to flush reasoning parser state");
Vec::new()
}
}
}
}
/// Push one semantic text delta if it is non-empty.
fn push_text_delta(events: &mut Vec<ContentEvent>, kind: AssistantBlockKind, delta: String) {
if delta.is_empty() {
return;
}
events.push(ContentEvent::TextDelta { kind, delta });
}
/// Convert one parsed reasoning delta into zero or more content events.
fn push_reasoning_delta(events: &mut Vec<ContentEvent>, delta: ReasoningDelta) {
if let Some(reasoning) = delta.reasoning {
push_text_delta(events, AssistantBlockKind::Reasoning, reasoning);
}
if let Some(content) = delta.content {
push_text_delta(events, AssistantBlockKind::Text, content);
}
}
/// Wrap one decoded-text stream into the internal reasoning event stream.
#[try_stream]
pub(crate) async fn reasoning_event_stream(
decoded_stream: impl DecodedTextEventStream,
reasoning_parser: Option<Box<dyn ReasoningParser>>,
mut y: TryYielder<ContentEvent, Error>,
) -> Result<()> {
pin_mut!(decoded_stream);
// Without a parser, pass through as plain text deltas.
let Some(reasoning_parser) = reasoning_parser else {
while let Some(event) = decoded_stream.next().await.transpose()? {
for next in ContentEvent::from_decoded_plain_text(event) {
y.yield_ok(next).await;
}
}
return Ok(());
};
let mut state = ReasoningState::new(reasoning_parser);
while let Some(event) = decoded_stream.next().await.transpose()? {
match event {
DecodedTextEvent::Start {
prompt_token_ids,
prompt_logprobs,
} => {
state.initialize(&prompt_token_ids);
y.yield_ok(ContentEvent::Start {
prompt_token_ids,
prompt_logprobs,
})
.await;
}
DecodedTextEvent::TextDelta {
delta,
token_ids,
logprobs,
finished,
} => {
for next in state.process_delta(delta) {
y.yield_ok(next).await;
}
if logprobs.is_some() || !token_ids.is_empty() {
y.yield_ok(ContentEvent::LogprobsDelta {
logprobs,
token_ids,
})
.await;
}
if let Some(finished) = finished {
for next in state.finish() {
y.yield_ok(next).await;
}
y.yield_ok(ContentEvent::Done {
prompt_token_count: finished.prompt_token_count,
output_token_count: finished.output_token_count,
finish_reason: finished.finish_reason,
kv_transfer_params: finished.kv_transfer_params,
})
.await;
}
}
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use futures::{StreamExt as _, stream};
use vllm_llm::FinishReason;
use vllm_text::output::{
DecodedLogprobs, DecodedPositionLogprobs, DecodedTextEvent, DecodedTokenLogprob,
};
use vllm_tokenizer::{DynTokenizer, Tokenizer};
use super::super::ContentEvent;
use super::reasoning_event_stream;
use crate::event::AssistantBlockKind;
use crate::parser::reasoning::{
ReasoningDelta, ReasoningError, ReasoningParser, ReasoningParserFactory, names,
};
struct FakeTokenizer;
impl Tokenizer for FakeTokenizer {
fn encode(
&self,
text: &str,
_add_special_tokens: bool,
) -> vllm_tokenizer::Result<Vec<u32>> {
Ok(text.chars().map(u32::from).collect())
}
fn decode(
&self,
token_ids: &[u32],
_skip_special_tokens: bool,
) -> vllm_tokenizer::Result<String> {
Ok(token_ids
.iter()
.map(|token_id| char::from_u32(*token_id).unwrap_or('\u{FFFD}'))
.collect())
}
fn token_to_id(&self, token: &str) -> Option<u32> {
match token {
"<think>" => Some(1),
"</think>" => Some(2),
_ => None,
}
}
}
struct FailingReasoningParser {
fail_next: bool,
}
impl ReasoningParser for FailingReasoningParser {
fn create(_tokenizer: DynTokenizer) -> Result<Box<dyn ReasoningParser>, ReasoningError>
where
Self: Sized + 'static,
{
Ok(Box::new(Self { fail_next: true }))
}
fn push(&mut self, _text: &str) -> Result<ReasoningDelta, ReasoningError> {
if self.fail_next {
self.fail_next = false;
return Err(ReasoningError::MissingToken {
token: "<think>".to_string(),
});
}
Ok(ReasoningDelta::default())
}
}
fn test_reasoning_parser(factory: &mut ReasoningParserFactory) -> Box<dyn ReasoningParser> {
factory.register_parser::<FailingReasoningParser>("failing");
factory.create("failing", Arc::new(FakeTokenizer)).unwrap()
}
#[tokio::test]
async fn reasoning_parser_failure_falls_back_to_plain_text() {
let mut factory = ReasoningParserFactory::new();
let events = stream::iter(vec![
Ok(DecodedTextEvent::Start {
prompt_token_ids: vec![1, 2, 3].into(),
prompt_logprobs: None,
}),
Ok(DecodedTextEvent::TextDelta {
delta: "abc".to_string(),
token_ids: vec![],
logprobs: None,
finished: None,
}),
Ok(DecodedTextEvent::TextDelta {
delta: "def".to_string(),
token_ids: vec![],
logprobs: None,
finished: Some(vllm_text::Finished {
prompt_token_count: 3,
output_token_count: 0,
finish_reason: FinishReason::stop_eos(),
kv_transfer_params: None,
}),
}),
]);
let collected = reasoning_event_stream(events, Some(test_reasoning_parser(&mut factory)))
.collect::<Vec<_>>()
.await;
let events = collected
.into_iter()
.collect::<crate::Result<Vec<_>>>()
.expect("reasoning stream should not fail");
assert_eq!(
events,
vec![
ContentEvent::Start {
prompt_token_ids: vec![1, 2, 3].into(),
prompt_logprobs: None,
},
ContentEvent::TextDelta {
kind: AssistantBlockKind::Text,
delta: "abc".to_string(),
},
ContentEvent::TextDelta {
kind: AssistantBlockKind::Text,
delta: "def".to_string(),
},
ContentEvent::Done {
prompt_token_count: 3,
output_token_count: 0,
finish_reason: FinishReason::stop_eos(),
kv_transfer_params: None,
},
]
);
}
#[tokio::test]
async fn reasoning_stream_preserves_logprobs_delta() {
let events = stream::iter(vec![
Ok(DecodedTextEvent::Start {
prompt_token_ids: vec![1].into(),
prompt_logprobs: None,
}),
Ok(DecodedTextEvent::TextDelta {
delta: "abc".to_string(),
token_ids: vec![],
logprobs: Some(DecodedLogprobs {
positions: vec![DecodedPositionLogprobs {
entries: vec![DecodedTokenLogprob {
token_id: 0,
token: "a".to_string(),
logprob: -0.1,
rank: 1,
}],
}],
}),
finished: None,
}),
]);
let collected = reasoning_event_stream(events, None)
.collect::<Vec<_>>()
.await
.into_iter()
.collect::<crate::Result<Vec<_>>>()
.unwrap();
assert_eq!(
collected,
vec![
ContentEvent::Start {
prompt_token_ids: vec![1].into(),
prompt_logprobs: None,
},
ContentEvent::TextDelta {
kind: AssistantBlockKind::Text,
delta: "abc".to_string(),
},
ContentEvent::LogprobsDelta {
logprobs: Some(DecodedLogprobs {
positions: vec![DecodedPositionLogprobs {
entries: vec![DecodedTokenLogprob {
token_id: 0,
token: "a".to_string(),
logprob: -0.1,
rank: 1,
}],
}],
}),
token_ids: vec![],
},
]
);
}
#[tokio::test]
async fn qwen3_parser_uses_prompt_end_marker_to_switch_to_content() {
let tokenizer = Arc::new(FakeTokenizer);
let events = stream::iter(vec![
Ok(DecodedTextEvent::Start {
prompt_token_ids: vec![2].into(),
prompt_logprobs: None,
}),
Ok(DecodedTextEvent::TextDelta {
delta: "thought ".to_string(),
token_ids: vec![],
logprobs: None,
finished: None,
}),
Ok(DecodedTextEvent::TextDelta {
delta: "done</think>OK".to_string(),
token_ids: vec![],
logprobs: None,
finished: None,
}),
]);
let factory = ReasoningParserFactory::new();
let collected = reasoning_event_stream(
events,
Some(factory.create(names::QWEN3, tokenizer).unwrap()),
)
.collect::<Vec<_>>()
.await;
let events = collected
.into_iter()
.collect::<crate::Result<Vec<_>>>()
.expect("reasoning stream should not fail");
assert_eq!(
events,
vec![
ContentEvent::Start {
prompt_token_ids: vec![2].into(),
prompt_logprobs: None,
},
ContentEvent::TextDelta {
kind: AssistantBlockKind::Text,
delta: "thought ".to_string(),
},
ContentEvent::TextDelta {
kind: AssistantBlockKind::Text,
delta: "done</think>OK".to_string(),
},
]
);
}
#[tokio::test]
async fn qwen3_parser_tolerates_prompt_prefill_reasoning() {
let tokenizer = Arc::new(FakeTokenizer);
let events = stream::iter(vec![
Ok(DecodedTextEvent::Start {
prompt_token_ids: vec![1].into(),
prompt_logprobs: None,
}),
Ok(DecodedTextEvent::TextDelta {
delta: "thought ".to_string(),
token_ids: vec![],
logprobs: None,
finished: None,
}),
Ok(DecodedTextEvent::TextDelta {
delta: "done</think>OK".to_string(),
token_ids: vec![],
logprobs: None,
finished: None,
}),
]);
let factory = ReasoningParserFactory::new();
let collected = reasoning_event_stream(
events,
Some(factory.create(names::QWEN3, tokenizer).unwrap()),
)
.collect::<Vec<_>>()
.await;
let events = collected
.into_iter()
.collect::<crate::Result<Vec<_>>>()
.expect("reasoning stream should not fail");
assert_eq!(
events,
vec![
ContentEvent::Start {
prompt_token_ids: vec![1].into(),
prompt_logprobs: None,
},
ContentEvent::TextDelta {
kind: AssistantBlockKind::Reasoning,
delta: "thought ".to_string(),
},
ContentEvent::TextDelta {
kind: AssistantBlockKind::Reasoning,
delta: "done".to_string(),
},
ContentEvent::TextDelta {
kind: AssistantBlockKind::Text,
delta: "OK".to_string(),
},
]
);
}
}
+625
View File
@@ -0,0 +1,625 @@
//! Adapts plain assistant text deltas into tool-call-aware assistant updates.
//!
//! This stage runs after reasoning separation and before final block assembly.
//! It only inspects normal assistant text, leaves reasoning deltas untouched,
//! and translates incremental tool parsing output into internal tool-call
//! events while preserving plain-text fallback behavior.
use asynk_strim_attr::{TryYielder, try_stream};
use futures::{StreamExt as _, pin_mut};
use thiserror_ext::AsReport;
use tracing::warn;
use super::{AssistantEvent, ContentEvent, ContentEventStream};
use crate::Result;
use crate::error::Error;
use crate::event::AssistantBlockKind;
use crate::output::generate_tool_call_id;
use crate::parser::tool::{ToolCallDelta, ToolParseResult, ToolParser};
/// Per-stream tool parsing state.
struct ToolState {
/// Parser for the current model family.
parser: Box<dyn ToolParser>,
/// Whether tool parsing has already failed for this stream.
parser_failed: bool,
/// The parser-local index of the currently open tool call, if any.
// NOTE: We only allow single open tool call at a time right now, since that's what all
// supported parsers currently emit. Change this to a `BTreeMap` if we need to support multiple
// interleaved calls in the future.
open_call_index: Option<usize>,
}
impl ToolState {
/// Create one fresh tool-parsing state for a new streamed response.
fn new(parser: Box<dyn ToolParser>) -> Self {
Self {
parser,
parser_failed: false,
open_call_index: None,
}
}
/// Convert one semantic assistant text delta into zero or more tool-aware
/// internal events.
fn process_text_delta(
&mut self,
kind: AssistantBlockKind,
delta: String,
) -> Result<Vec<AssistantEvent>> {
let mut events = Vec::new();
// Only normal assistant text is eligible for tool parsing. Reasoning
// blocks and plain-text fallback should pass through unchanged.
if kind != AssistantBlockKind::Text || self.parser_failed {
self.open_call_index = None;
events.push(AssistantEvent::TextDelta { kind, delta });
return Ok(events);
}
let parse_result = self.parser.push(&delta);
match parse_result {
Ok(result) => self.process_parse_result(kind, result, &mut events)?,
Err(error) => {
if !self.parser_failed {
warn!(
error = %error.as_report(),
"tool parser failed; falling back to plain text deltas"
);
self.parser_failed = true;
}
self.open_call_index = None;
events.push(AssistantEvent::TextDelta { kind, delta });
}
}
Ok(events)
}
/// Apply one parsed tool result to the current stream state.
fn process_parse_result(
&mut self,
kind: AssistantBlockKind,
result: ToolParseResult,
events: &mut Vec<AssistantEvent>,
) -> Result<()> {
// When we are not currently streaming a tool call, preserve plain
// text first and then surface any new tool call items.
if self.open_call_index.is_none() {
push_text_delta(events, kind, result.normal_text);
self.process_tool_items(result.calls, events)?;
} else {
// Once a tool call is open, prioritize tool deltas first. If the
// parser emits normal text again, close the tool call and resume
// plain text output.
self.process_tool_items(result.calls, events)?;
if !result.normal_text.is_empty() {
self.open_call_index = None;
push_text_delta(events, kind, result.normal_text);
}
}
Ok(())
}
/// Apply one batch of parsed tool-call deltas emitted by the parser.
fn process_tool_items(
&mut self,
items: Vec<ToolCallDelta>,
events: &mut Vec<AssistantEvent>,
) -> Result<()> {
for item in items {
if let Some(name) = item.name {
let is_new_tool = match self.open_call_index {
Some(open_call_index) => open_call_index != item.tool_index,
None => true,
};
if is_new_tool {
let id = generate_tool_call_id();
self.open_call_index = Some(item.tool_index);
events.push(AssistantEvent::ToolCallStart { id, name });
}
}
if item.arguments.is_empty() {
// No arguments delta to apply.
continue;
}
let Some(open_call_index) = self.open_call_index else {
return Err(Error::ToolCallStreamInvariant {
message: format!(
"received arguments for tool index {} before any tool-call start",
item.tool_index
),
});
};
if open_call_index != item.tool_index {
return Err(Error::ToolCallStreamInvariant {
message: format!(
"received arguments for tool index {} while tool index {} is open",
item.tool_index, open_call_index
),
});
}
events.push(AssistantEvent::ToolCallArgumentsDelta {
delta: item.arguments,
});
}
Ok(())
}
/// Flush parser state at end-of-stream and close any remaining open calls.
fn finish(&mut self) -> Result<Vec<AssistantEvent>> {
let mut events = Vec::new();
if self.parser_failed {
return Ok(events);
}
match self.parser.finish() {
Ok(result) => {
self.process_parse_result(AssistantBlockKind::Text, result, &mut events)?
}
Err(error) => {
warn!(
error = %error.as_report(),
"tool parser finish failed; closing open tool calls with buffered state"
);
self.parser_failed = true;
}
}
Ok(events)
}
}
/// Push one plain-text delta if it is non-empty.
fn push_text_delta(events: &mut Vec<AssistantEvent>, kind: AssistantBlockKind, delta: String) {
if delta.is_empty() {
return;
}
events.push(AssistantEvent::TextDelta { kind, delta });
}
/// Wrap one semantic assistant stream into the internal tool-aware assistant
/// stream.
#[try_stream]
pub(crate) async fn tool_event_stream(
stream: impl ContentEventStream,
parser: Option<Box<dyn ToolParser>>,
mut y: TryYielder<AssistantEvent, Error>,
) -> Result<()> {
// Without a parser, pass through the input stream unchanged.
let Some(parser) = parser else {
pin_mut!(stream);
while let Some(event) = stream.next().await.transpose()? {
y.yield_ok(event.into()).await;
}
return Ok(());
};
pin_mut!(stream);
let mut state = ToolState::new(parser);
while let Some(event) = stream.next().await.transpose()? {
match event {
ContentEvent::Start {
prompt_token_ids,
prompt_logprobs,
} => {
y.yield_ok(AssistantEvent::Start {
prompt_token_ids,
prompt_logprobs,
})
.await;
}
ContentEvent::TextDelta { kind, delta } => {
for next in state.process_text_delta(kind, delta)? {
y.yield_ok(next).await;
}
}
ContentEvent::LogprobsDelta {
logprobs,
token_ids,
} => {
y.yield_ok(AssistantEvent::LogprobsDelta {
logprobs,
token_ids,
})
.await;
}
ContentEvent::Done {
prompt_token_count,
output_token_count,
finish_reason,
kv_transfer_params,
} => {
for next in state.finish()? {
y.yield_ok(next).await;
}
y.yield_ok(AssistantEvent::Done {
prompt_token_count,
output_token_count,
finish_reason,
kv_transfer_params,
})
.await;
}
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use futures::{StreamExt as _, stream};
use vllm_llm::FinishReason;
use vllm_text::{DecodedLogprobs, DecodedPositionLogprobs, DecodedTokenLogprob};
use vllm_tool_parser::Result;
use super::super::{AssistantEvent, ContentEvent};
use super::tool_event_stream;
use crate::error::Error;
use crate::event::{AssistantBlockKind, AssistantMessageExt as _};
use crate::output::structured::structured_chat_event_stream;
use crate::parser::tool::{ToolParseResult, ToolParser, ToolParserError};
use crate::request::ChatTool;
use crate::stream::ChatEventStream;
struct FailingParser {
fail_next: bool,
}
struct ScriptedParser {
push_results: Vec<ToolParseResult>,
finish_result: ToolParseResult,
}
impl ToolParser for FailingParser {
fn create(_tools: &[ChatTool]) -> vllm_tool_parser::Result<Box<dyn ToolParser>>
where
Self: Sized + 'static,
{
Ok(Box::new(Self { fail_next: false }))
}
fn push(&mut self, _chunk: &str) -> Result<ToolParseResult> {
if self.fail_next {
self.fail_next = false;
return Err(ToolParserError::ParsingFailed {
message: "boom".to_string(),
});
}
Ok(ToolParseResult::default())
}
}
impl ToolParser for ScriptedParser {
fn create(_tools: &[ChatTool]) -> vllm_tool_parser::Result<Box<dyn ToolParser>>
where
Self: Sized + 'static,
{
Ok(Box::new(Self {
push_results: Vec::new(),
finish_result: ToolParseResult::default(),
}))
}
fn push(&mut self, _chunk: &str) -> Result<ToolParseResult> {
Ok(self.push_results.pop().unwrap_or_default())
}
fn finish(&mut self) -> Result<ToolParseResult> {
Ok(std::mem::take(&mut self.finish_result))
}
}
#[tokio::test]
async fn tool_parser_failure_falls_back_to_plain_text() {
let events = stream::iter(vec![
Ok(ContentEvent::Start {
prompt_token_ids: vec![1, 2, 3].into(),
prompt_logprobs: None,
}),
Ok(ContentEvent::TextDelta {
kind: AssistantBlockKind::Text,
delta: "abc".to_string(),
}),
Ok(ContentEvent::TextDelta {
kind: AssistantBlockKind::Text,
delta: "def".to_string(),
}),
Ok(ContentEvent::Done {
prompt_token_count: 3,
output_token_count: 0,
finish_reason: FinishReason::stop_eos(),
kv_transfer_params: None,
}),
]);
let collected =
tool_event_stream(events, Some(Box::new(FailingParser { fail_next: true })))
.collect::<Vec<_>>()
.await;
let events = collected
.into_iter()
.collect::<crate::Result<Vec<_>>>()
.expect("tool stream should not fail");
assert_eq!(
events,
vec![
AssistantEvent::Start {
prompt_token_ids: vec![1, 2, 3].into(),
prompt_logprobs: None,
},
AssistantEvent::TextDelta {
kind: AssistantBlockKind::Text,
delta: "abc".to_string(),
},
AssistantEvent::TextDelta {
kind: AssistantBlockKind::Text,
delta: "def".to_string(),
},
AssistantEvent::Done {
prompt_token_count: 3,
output_token_count: 0,
finish_reason: FinishReason::stop_eos(),
kv_transfer_params: None,
},
]
);
let message = ChatEventStream::new(
"req_fallback".to_string(),
Box::pin(structured_chat_event_stream(stream::iter(
events.into_iter().map(Ok),
))),
)
.collect_message()
.await
.expect("collect_message should succeed");
assert_eq!(message.message.text(), "abcdef");
assert!(message.message.tool_calls().next().is_none());
}
#[tokio::test]
async fn tool_stream_preserves_logprobs_delta() {
let events = stream::iter(vec![
Ok(ContentEvent::Start {
prompt_token_ids: vec![1].into(),
prompt_logprobs: None,
}),
Ok(ContentEvent::LogprobsDelta {
logprobs: Some(DecodedLogprobs {
positions: vec![DecodedPositionLogprobs {
entries: vec![DecodedTokenLogprob {
token_id: 0,
token: "a".to_string(),
logprob: -0.2,
rank: 1,
}],
}],
}),
token_ids: vec![],
}),
Ok(ContentEvent::Done {
prompt_token_count: 1,
output_token_count: 0,
finish_reason: FinishReason::stop_eos(),
kv_transfer_params: None,
}),
]);
let events = tool_event_stream(events, Some(Box::new(FailingParser { fail_next: false })))
.collect::<Vec<_>>()
.await
.into_iter()
.collect::<crate::Result<Vec<_>>>()
.unwrap();
assert_eq!(
events,
vec![
AssistantEvent::Start {
prompt_token_ids: vec![1].into(),
prompt_logprobs: None,
},
AssistantEvent::LogprobsDelta {
logprobs: Some(DecodedLogprobs {
positions: vec![DecodedPositionLogprobs {
entries: vec![DecodedTokenLogprob {
token_id: 0,
token: "a".to_string(),
logprob: -0.2,
rank: 1,
}],
}],
}),
token_ids: vec![],
},
AssistantEvent::Done {
prompt_token_count: 1,
output_token_count: 0,
finish_reason: FinishReason::stop_eos(),
kv_transfer_params: None,
},
]
);
}
#[tokio::test]
async fn tool_stream_rejects_interleaved_tool_indices() {
let events = stream::iter(vec![
Ok(ContentEvent::TextDelta {
kind: AssistantBlockKind::Text,
delta: "ignored".to_string(),
}),
Ok(ContentEvent::Done {
prompt_token_count: 1,
output_token_count: 1,
finish_reason: FinishReason::stop_eos(),
kv_transfer_params: None,
}),
]);
let parser = ScriptedParser {
push_results: vec![ToolParseResult {
normal_text: String::new(),
calls: vec![
crate::parser::tool::ToolCallDelta {
tool_index: 0,
name: Some("first".to_string()),
arguments: String::new(),
},
crate::parser::tool::ToolCallDelta {
tool_index: 1,
name: None,
arguments: "{}".to_string(),
},
],
}],
finish_result: ToolParseResult::default(),
};
let err = tool_event_stream(events, Some(Box::new(parser)))
.collect::<Vec<_>>()
.await
.into_iter()
.find_map(|result| result.err())
.expect("expected invariant error");
assert!(matches!(err, Error::ToolCallStreamInvariant { .. }));
}
#[tokio::test]
async fn tool_stream_resets_open_tool_when_normal_text_interrupts_it() {
let events = stream::iter(vec![
Ok(ContentEvent::TextDelta {
kind: AssistantBlockKind::Text,
delta: "start".to_string(),
}),
Ok(ContentEvent::TextDelta {
kind: AssistantBlockKind::Text,
delta: "text".to_string(),
}),
Ok(ContentEvent::TextDelta {
kind: AssistantBlockKind::Text,
delta: "args".to_string(),
}),
]);
let parser = ScriptedParser {
push_results: vec![
ToolParseResult {
normal_text: String::new(),
calls: vec![crate::parser::tool::ToolCallDelta {
tool_index: 0,
name: None,
arguments: "}".to_string(),
}],
},
ToolParseResult {
normal_text: "plain text".to_string(),
calls: Vec::new(),
},
ToolParseResult {
normal_text: String::new(),
calls: vec![crate::parser::tool::ToolCallDelta {
tool_index: 0,
name: Some("first".to_string()),
arguments: "{".to_string(),
}],
},
],
finish_result: ToolParseResult::default(),
};
let err = tool_event_stream(events, Some(Box::new(parser)))
.collect::<Vec<_>>()
.await
.into_iter()
.find_map(|result| result.err())
.expect("expected invariant error");
assert!(matches!(
err,
Error::ToolCallStreamInvariant { message }
if message == "received arguments for tool index 0 before any tool-call start"
));
}
#[tokio::test]
async fn tool_stream_emits_start_and_args_for_terminal_text() {
let events = stream::iter(vec![
Ok(ContentEvent::Start {
prompt_token_ids: vec![1].into(),
prompt_logprobs: None,
}),
Ok(ContentEvent::TextDelta {
kind: AssistantBlockKind::Text,
delta: "ignored".to_string(),
}),
Ok(ContentEvent::Done {
prompt_token_count: 1,
output_token_count: 1,
finish_reason: FinishReason::stop_eos(),
kv_transfer_params: None,
}),
]);
let parser = ScriptedParser {
push_results: vec![ToolParseResult {
normal_text: String::new(),
calls: vec![
crate::parser::tool::ToolCallDelta {
tool_index: 0,
name: Some("first".to_string()),
arguments: r#"{"a":1}"#.to_string(),
},
crate::parser::tool::ToolCallDelta {
tool_index: 1,
name: Some("second".to_string()),
arguments: r#"{"b":2}"#.to_string(),
},
],
}],
finish_result: ToolParseResult::default(),
};
let events = tool_event_stream(events, Some(Box::new(parser)))
.collect::<Vec<_>>()
.await
.into_iter()
.collect::<crate::Result<Vec<_>>>()
.unwrap();
assert!(matches!(events[1], AssistantEvent::ToolCallStart { .. }));
assert!(matches!(
events[2],
AssistantEvent::ToolCallArgumentsDelta { .. }
));
assert!(matches!(events[3], AssistantEvent::ToolCallStart { .. }));
assert!(matches!(
events[4],
AssistantEvent::ToolCallArgumentsDelta { .. }
));
let collected = ChatEventStream::new(
"req_final_only".to_string(),
Box::pin(structured_chat_event_stream(stream::iter(
events.into_iter().map(Ok),
))),
)
.collect_message()
.await
.unwrap();
let tool_calls = collected.message.tool_calls().collect::<Vec<_>>();
assert_eq!(tool_calls.len(), 2);
assert_eq!(tool_calls[0].name, "first");
assert_eq!(tool_calls[1].name, "second");
}
}
+430
View File
@@ -0,0 +1,430 @@
//! Native Harmony output processing for `gpt_oss`.
//!
//! Unlike the default text-first pipeline, this processor consumes
//! `DecodedTextEvent` token IDs directly and lets the official `openai-harmony`
//! parser recover the structured assistant message shape at token granularity.
use std::sync::LazyLock;
use anyhow::Context;
use asynk_strim_attr::{TryYielder, try_stream};
use futures::StreamExt as _;
use openai_harmony::chat::{Content as HarmonyContent, Message as HarmonyMessage, Role};
use openai_harmony::{
HarmonyEncoding, HarmonyEncodingName, StreamableParser, load_harmony_encoding,
};
use thiserror_ext::AsReport;
use vllm_text::output::DecodedTextEvent;
use crate::Result as ChatResult;
use crate::error::{Error, Result};
use crate::event::AssistantBlockKind;
use crate::output::{
AssistantEvent, ChatOutputProcessor, DynChatEventStream, DynDecodedTextEventStream,
generate_tool_call_id,
};
use crate::parser::ParserSelection;
use crate::request::ChatRequest;
/// Request-scoped Harmony output processor used for `model_type == "gpt_oss"`.
///
/// This processor keeps the existing northbound `ChatEvent` shape, but swaps
/// the parsed-assistant backend from generic text/reasoning/tool parsers to the
/// official Harmony token parser.
#[derive(Debug)]
pub struct HarmonyChatOutputProcessor {
encoding: &'static HarmonyEncoding,
tool_calls_enabled: bool,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
struct HarmonyGroupKey {
serial: usize,
channel: Option<String>,
recipient: Option<String>,
}
#[derive(Debug)]
struct HarmonyGroup {
key: HarmonyGroupKey,
text: String,
}
#[derive(Debug)]
struct OpenHarmonyToolCall {
recipient: String,
}
struct HarmonyState {
/// Incremental Harmony parser over assistant token IDs.
parser: StreamableParser,
/// Whether tool-call content should surface as structured tool events.
tool_calls_enabled: bool,
/// Count of completed visible assistant messages for newline insertion.
completed_visible_messages: usize,
/// Count of completed reasoning messages for newline insertion.
completed_reasoning_messages: usize,
/// The current visible text/reasoning group, if any.
current_text_group: Option<HarmonyGroupKey>,
/// The currently open Harmony tool recipient, if any.
open_tool_call: Option<OpenHarmonyToolCall>,
}
impl HarmonyChatOutputProcessor {
/// Build one request-scoped Harmony processor after backend policy checks.
pub fn new(request: &ChatRequest) -> ChatResult<Self> {
Ok(Self {
encoding: harmony_encoding()?,
tool_calls_enabled: request.tool_parsing_enabled(),
})
}
}
/// Validate that the generic parser selections are compatible with native
/// Harmony output parsing.
///
/// `gpt_oss` uses a model-specific token-level parser, so any generic
/// reasoning/tool parser override is rejected instead of being silently
/// ignored.
pub(crate) fn validate_harmony_parser_overrides(
tool_call_parser: &ParserSelection,
reasoning_parser: &ParserSelection,
) -> ChatResult<()> {
validate_harmony_override("tool", tool_call_parser)?;
validate_harmony_override("reasoning", reasoning_parser)?;
Ok(())
}
fn validate_harmony_override(kind: &'static str, selection: &ParserSelection) -> ChatResult<()> {
if matches!(selection, ParserSelection::Auto) {
return Ok(());
}
Err(Error::HarmonyParserOverrideUnsupported {
kind,
selection: selection.to_string(),
})
}
impl ChatOutputProcessor for HarmonyChatOutputProcessor {
fn process(self: Box<Self>, decoded: DynDecodedTextEventStream) -> Result<DynChatEventStream> {
let assistant =
harmony_assistant_event_stream(decoded, self.encoding, self.tool_calls_enabled);
Ok(crate::output::structured::structured_chat_event_stream(assistant).boxed())
}
}
impl HarmonyState {
/// Create one fresh Harmony streaming state for a new assistant response.
fn new(encoding: HarmonyEncoding, tool_calls_enabled: bool) -> Result<Self> {
Ok(Self {
parser: StreamableParser::new(encoding, Some(Role::Assistant))
.map_err(harmony_output_parsing_error)?,
tool_calls_enabled,
completed_visible_messages: 0,
completed_reasoning_messages: 0,
current_text_group: None,
open_tool_call: None,
})
}
fn process_token_ids(&mut self, token_ids: &[u32]) -> Result<Vec<AssistantEvent>> {
let mut events = Vec::new();
let mut pending_group: Option<HarmonyGroup> = None;
for &token_id in token_ids {
let completed_before = self.parser.messages().len();
self.parser.process(token_id).map_err(harmony_output_parsing_error)?;
let completed_after = self.parser.messages().len();
if let Some(delta) = self
.parser
.last_content_delta()
.map_err(harmony_output_parsing_error)?
.filter(|delta| !delta.is_empty())
{
let key = HarmonyGroupKey {
serial: completed_after,
channel: self.parser.current_channel(),
recipient: self.parser.current_recipient(),
};
match pending_group.as_mut() {
Some(group) if group.key == key => group.text.push_str(&delta),
_ => {
if let Some(group) = pending_group.take() {
self.emit_group(group, &mut events);
}
pending_group = Some(HarmonyGroup { key, text: delta });
}
}
}
if completed_after > completed_before {
if let Some(group) = pending_group.take() {
self.emit_group(group, &mut events);
}
for serial in completed_before..completed_after {
let key = {
let message = &self.parser.messages()[serial];
HarmonyGroupKey {
serial,
channel: message.channel.clone(),
recipient: message.recipient.clone(),
}
};
self.handle_completed_message(key);
}
}
}
if let Some(group) = pending_group {
self.emit_group(group, &mut events);
}
Ok(events)
}
/// Flush Harmony parser state at EOS and emit any newly finalized assistant
/// events.
fn process_eos(&mut self) -> Result<Vec<AssistantEvent>> {
let completed_before = self.parser.messages().len();
let pending_key = HarmonyGroupKey {
serial: completed_before,
channel: self.parser.current_channel(),
recipient: self.parser.current_recipient(),
};
let pending_content =
self.parser.current_content().map_err(harmony_output_parsing_error)?;
self.parser.process_eos().map_err(harmony_output_parsing_error)?;
let completed_after = self.parser.messages().len();
let mut events = Vec::new();
if completed_after == completed_before {
return Ok(events);
}
let final_message = &self.parser.messages()[completed_before];
let final_text = harmony_message_text(final_message);
let tail = final_text.strip_prefix(&pending_content).unwrap_or(final_text).to_string();
if !tail.is_empty() {
self.emit_group(
HarmonyGroup {
key: pending_key,
text: tail,
},
&mut events,
);
}
for serial in completed_before..completed_after {
let key = {
let message = &self.parser.messages()[serial];
HarmonyGroupKey {
serial,
channel: message.channel.clone(),
recipient: message.recipient.clone(),
}
};
self.handle_completed_message(key);
}
Ok(events)
}
/// Flush one coalesced Harmony content group into internal assistant
/// events.
fn emit_group(&mut self, group: HarmonyGroup, events: &mut Vec<AssistantEvent>) {
let channel = group.key.channel.as_deref();
let recipient = group.key.recipient.as_deref();
if let Some(kind) = text_block_kind(channel, recipient) {
self.open_tool_call = None;
if self.current_text_group.as_ref() != Some(&group.key) {
let needs_newline = match kind {
AssistantBlockKind::Text => self.completed_visible_messages > 0,
AssistantBlockKind::Reasoning => self.completed_reasoning_messages > 0,
AssistantBlockKind::ToolCall => false,
};
if needs_newline {
events.push(AssistantEvent::TextDelta {
kind,
delta: "\n".to_string(),
});
}
self.current_text_group = Some(group.key.clone());
}
events.push(AssistantEvent::TextDelta {
kind,
delta: group.text,
});
return;
}
self.current_text_group = None;
let Some(tool_name) = tool_name(channel, recipient) else {
return;
};
if !self.tool_calls_enabled {
return;
}
let recipient = recipient.expect("tool groups always have recipient").to_string();
let opens_same_call = match self.open_tool_call.as_ref() {
Some(open_call) => open_call.recipient == recipient,
None => false,
};
if !opens_same_call {
let id = generate_tool_call_id();
self.open_tool_call = Some(OpenHarmonyToolCall { recipient });
events.push(AssistantEvent::ToolCallStart {
id,
name: tool_name.to_string(),
});
}
if !group.text.is_empty() {
events.push(AssistantEvent::ToolCallArgumentsDelta { delta: group.text });
}
}
/// Update newline and open-tool state after one Harmony message completes.
fn handle_completed_message(&mut self, key: HarmonyGroupKey) {
if self.current_text_group.as_ref() == Some(&key) {
self.current_text_group = None;
}
let channel = key.channel.as_deref();
let recipient = key.recipient.as_deref();
let kind = text_block_kind(channel, recipient);
if kind == Some(AssistantBlockKind::Text) {
self.completed_visible_messages += 1;
} else if kind == Some(AssistantBlockKind::Reasoning) {
self.completed_reasoning_messages += 1;
} else if tool_name(channel, recipient).is_some() {
self.open_tool_call = None;
}
}
}
/// Convert decoded token updates into internal assistant events with Harmony
/// parsing.
#[try_stream]
async fn harmony_assistant_event_stream(
decoded: DynDecodedTextEventStream,
encoding: &'static HarmonyEncoding,
tool_calls_enabled: bool,
mut y: TryYielder<AssistantEvent, Error>,
) -> Result<()> {
let mut state = HarmonyState::new(encoding.clone(), tool_calls_enabled)?;
futures::pin_mut!(decoded);
while let Some(event) = decoded.next().await.transpose()? {
match event {
DecodedTextEvent::Start {
prompt_token_ids,
prompt_logprobs,
} => {
y.yield_ok(AssistantEvent::Start {
prompt_token_ids,
prompt_logprobs,
})
.await;
}
DecodedTextEvent::TextDelta {
delta: _, // harmony takes raw token IDs as input, so we ignore text deltas here
token_ids,
logprobs,
finished,
} => {
for event in state.process_token_ids(&token_ids)? {
y.yield_ok(event).await;
}
if finished.is_some() {
for event in state.process_eos()? {
y.yield_ok(event).await;
}
}
if logprobs.is_some() || !token_ids.is_empty() {
y.yield_ok(AssistantEvent::LogprobsDelta {
logprobs,
token_ids,
})
.await;
}
if let Some(finished) = finished {
y.yield_ok(AssistantEvent::Done {
prompt_token_count: finished.prompt_token_count,
output_token_count: finished.output_token_count,
finish_reason: finished.finish_reason,
kv_transfer_params: finished.kv_transfer_params,
})
.await;
}
}
}
}
Ok(())
}
/// Lazily load the shared GPT-OSS Harmony encoding once per process.
fn harmony_encoding() -> Result<&'static HarmonyEncoding> {
static ENCODING: LazyLock<anyhow::Result<HarmonyEncoding>> = LazyLock::new(|| {
load_harmony_encoding(HarmonyEncodingName::HarmonyGptOss)
.context("failed to load harmony encoding for gpt-oss")
});
ENCODING.as_ref().map_err(|error| Error::HarmonyOutputParsing {
error: error.to_report_string().into(),
})
}
fn harmony_output_parsing_error(
error: impl Into<Box<dyn std::error::Error + Send + Sync>>,
) -> Error {
Error::HarmonyOutputParsing {
error: error.into(),
}
}
/// Return the decoded text payload from one parsed Harmony message.
fn harmony_message_text(message: &HarmonyMessage) -> &str {
let [HarmonyContent::Text(text)] = message.content.as_slice() else {
unreachable!("Harmony parser emits one text content block per parsed message")
};
&text.text
}
/// Map one Harmony `(channel, recipient)` pair to a visible assistant block
/// kind.
fn text_block_kind(channel: Option<&str>, recipient: Option<&str>) -> Option<AssistantBlockKind> {
match (channel, recipient) {
(Some("final"), _) => Some(AssistantBlockKind::Text),
(Some("analysis"), None) => Some(AssistantBlockKind::Reasoning),
(Some("commentary"), None) => Some(AssistantBlockKind::Text),
_ => None,
}
}
/// Extract the tool name from a Harmony tool-recipient field, if present.
fn tool_name<'a>(channel: Option<&str>, recipient: Option<&'a str>) -> Option<&'a str> {
match (channel, recipient) {
(Some("commentary" | "analysis"), Some(recipient)) => recipient.strip_prefix("functions."),
_ => None,
}
}
#[cfg(test)]
mod tests;
+351
View File
@@ -0,0 +1,351 @@
//! Harmony output tests share the upstream `openai-harmony` tiktoken cache.
//!
//! Use a file lock for tests that load the encoding so `cargo nextest` cannot
//! start multiple processes that concurrently populate the same cache file.
use std::sync::Arc;
use futures::executor::block_on;
use futures::{TryStreamExt as _, stream};
use openai_harmony::chat::{Message, Role};
use serial_test::file_serial;
use vllm_text::output::{DecodedLogprobs, DecodedPositionLogprobs, DecodedTextEvent, Finished};
use super::*;
use crate::output::ChatOutputProcessor;
use crate::request::{ChatRequest, ChatTool, ChatToolChoice};
use crate::{AssistantMessageExt, ChatEvent, FinishReason};
fn assistant_prefix() -> Vec<u32> {
harmony_encoding()
.unwrap()
.render_conversation_for_completion(std::iter::empty::<&Message>(), Role::Assistant, None)
.unwrap()
}
fn completion_tokens(messages: &[Message]) -> Vec<u32> {
let encoding = harmony_encoding().unwrap();
let prefix = assistant_prefix();
let rendered = encoding.render_conversation(messages.iter(), None).unwrap();
assert!(rendered.starts_with(&prefix));
rendered[prefix.len()..].to_vec()
}
fn text_message(channel: &str, text: &str) -> Message {
Message::from_role_and_content(Role::Assistant, text).with_channel(channel)
}
fn tool_message(name: &str, arguments: &str, channel: &str) -> Message {
Message::from_role_and_content(Role::Assistant, arguments)
.with_channel(channel)
.with_recipient(format!("functions.{name}"))
.with_content_type("json")
}
fn decoded_start() -> DecodedTextEvent {
DecodedTextEvent::Start {
prompt_token_ids: Arc::<[u32]>::from([]),
prompt_logprobs: None,
}
}
fn finished() -> Finished {
Finished {
prompt_token_count: 0,
output_token_count: 0,
finish_reason: FinishReason::stop_eos(),
kv_transfer_params: None,
}
}
async fn collect_events(
processor: HarmonyChatOutputProcessor,
events: Vec<DecodedTextEvent>,
) -> Vec<ChatEvent> {
Box::new(processor)
.process(Box::pin(stream::iter(events.into_iter().map(Ok))))
.unwrap()
.try_collect()
.await
.unwrap()
}
fn request_with_tools() -> ChatRequest {
ChatRequest {
tool_choice: ChatToolChoice::Auto,
tools: vec![ChatTool {
name: "get_weather".to_string(),
description: Some("Get weather".to_string()),
parameters: serde_json::json!({
"type": "object",
"properties": {"city": {"type": "string"}},
"required": ["city"]
}),
strict: None,
}],
..ChatRequest::for_test()
}
}
#[test]
#[file_serial(harmony_tiktoken_cache)]
fn interrupted_final_message_is_preserved() {
let tokens = completion_tokens(&[text_message("final", "hello")]);
let events = block_on(collect_events(
HarmonyChatOutputProcessor::new(&ChatRequest::for_test()).unwrap(),
vec![
decoded_start(),
DecodedTextEvent::TextDelta {
delta: String::new(),
token_ids: tokens[..tokens.len() - 1].to_vec(),
logprobs: None,
finished: Some(finished()),
},
],
));
assert_eq!(
events.last(),
Some(&ChatEvent::Done {
message: crate::AssistantMessage {
content: vec![crate::AssistantContentBlock::Text {
text: "hello".to_string(),
}],
},
prompt_token_count: 0,
output_token_count: 0,
finish_reason: FinishReason::stop_eos(),
kv_transfer_params: None,
})
);
}
#[test]
#[file_serial(harmony_tiktoken_cache)]
fn eos_flush_preserves_trailing_replacement_text() {
let mut tokens = completion_tokens(&[text_message("final", "Hi")]);
tokens.pop();
tokens.push(u32::MAX);
let events = block_on(collect_events(
HarmonyChatOutputProcessor::new(&ChatRequest::for_test()).unwrap(),
vec![
decoded_start(),
DecodedTextEvent::TextDelta {
delta: String::new(),
token_ids: tokens,
logprobs: None,
finished: Some(finished()),
},
],
));
let ChatEvent::Done { message, .. } = events.last().unwrap() else {
panic!("expected done");
};
assert_eq!(message.text(), format!("Hi{}", char::REPLACEMENT_CHARACTER));
}
#[test]
#[file_serial(harmony_tiktoken_cache)]
fn interrupted_analysis_message_is_preserved() {
let tokens = completion_tokens(&[text_message("analysis", "think")]);
let events = block_on(collect_events(
HarmonyChatOutputProcessor::new(&ChatRequest::for_test()).unwrap(),
vec![
decoded_start(),
DecodedTextEvent::TextDelta {
delta: String::new(),
token_ids: tokens[..tokens.len() - 1].to_vec(),
logprobs: None,
finished: Some(finished()),
},
],
));
assert_eq!(
events.last(),
Some(&ChatEvent::Done {
message: crate::AssistantMessage {
content: vec![crate::AssistantContentBlock::Reasoning {
text: "think".to_string(),
}],
},
prompt_token_count: 0,
output_token_count: 0,
finish_reason: FinishReason::stop_eos(),
kv_transfer_params: None,
})
);
}
#[test]
#[file_serial(harmony_tiktoken_cache)]
fn commentary_preamble_is_visible_but_commentary_tool_payload_is_not() {
let tokens = completion_tokens(&[
text_message("commentary", "Let me check."),
tool_message("get_weather", r#"{"city":"Paris"}"#, "commentary"),
]);
let events = block_on(collect_events(
HarmonyChatOutputProcessor::new(&request_with_tools()).unwrap(),
vec![
decoded_start(),
DecodedTextEvent::TextDelta {
delta: String::new(),
token_ids: tokens,
logprobs: None,
finished: Some(finished()),
},
],
));
let done = events.last().unwrap();
let ChatEvent::Done { message, .. } = done else {
panic!("expected done");
};
assert_eq!(message.text(), "Let me check.");
assert_eq!(message.tool_calls().count(), 1);
}
#[test]
#[file_serial(harmony_tiktoken_cache)]
fn multiple_messages_get_newline_separators() {
let tokens = completion_tokens(&[
text_message("analysis", "first think"),
text_message("analysis", "second think"),
text_message("final", "first answer"),
text_message("final", "second answer"),
]);
let events = block_on(collect_events(
HarmonyChatOutputProcessor::new(&ChatRequest::for_test()).unwrap(),
vec![
decoded_start(),
DecodedTextEvent::TextDelta {
delta: String::new(),
token_ids: tokens,
logprobs: None,
finished: Some(finished()),
},
],
));
let ChatEvent::Done { message, .. } = events.last().unwrap() else {
panic!("expected done");
};
assert_eq!(
message.reasoning().as_deref(),
Some("first think\nsecond think")
);
assert_eq!(message.text(), "first answer\nsecond answer");
}
#[test]
#[file_serial(harmony_tiktoken_cache)]
fn tool_calls_stream_arguments_and_finish_with_local_id_shape() {
let tokens = completion_tokens(&[tool_message(
"get_weather",
r#"{"city":"Paris"}"#,
"commentary",
)]);
let midpoint = tokens.len() / 2;
let events = block_on(collect_events(
HarmonyChatOutputProcessor::new(&request_with_tools()).unwrap(),
vec![
decoded_start(),
DecodedTextEvent::TextDelta {
delta: String::new(),
token_ids: tokens[..midpoint].to_vec(),
logprobs: None,
finished: None,
},
DecodedTextEvent::TextDelta {
delta: String::new(),
token_ids: tokens[midpoint..].to_vec(),
logprobs: None,
finished: Some(finished()),
},
],
));
let mut saw_start = None;
let mut saw_args = String::new();
let mut saw_end = None;
for event in &events {
match event {
ChatEvent::ToolCallStart { id, name, .. } => {
assert!(id.starts_with("call_"));
assert_eq!(name, "get_weather");
saw_start = Some(id.clone());
}
ChatEvent::ToolCallArgumentsDelta { delta, .. } => saw_args.push_str(delta),
ChatEvent::ToolCallEnd { call, .. } => {
saw_end = Some(call.clone());
}
_ => {}
}
}
let start_id = saw_start.expect("tool start");
assert_eq!(saw_args, r#"{"city":"Paris"}"#);
let end = saw_end.expect("tool end");
assert_eq!(end.id, start_id);
assert_eq!(end.arguments, r#"{"city":"Paris"}"#);
}
#[test]
#[file_serial(harmony_tiktoken_cache)]
fn semantic_events_precede_same_update_logprobs() {
let tokens = completion_tokens(&[text_message("final", "hello")]);
let events = block_on(collect_events(
HarmonyChatOutputProcessor::new(&ChatRequest::for_test()).unwrap(),
vec![
decoded_start(),
DecodedTextEvent::TextDelta {
delta: String::new(),
token_ids: tokens,
logprobs: Some(DecodedLogprobs {
positions: vec![DecodedPositionLogprobs { entries: vec![] }],
}),
finished: Some(finished()),
},
],
));
let block_delta_index = events
.iter()
.position(|event| matches!(event, ChatEvent::BlockDelta { .. }))
.unwrap();
let logprobs_index = events
.iter()
.position(|event| matches!(event, ChatEvent::LogprobsDelta { .. }))
.unwrap();
assert!(block_delta_index < logprobs_index);
}
#[test]
fn rejects_generic_parser_overrides() {
let reasoning_error =
validate_harmony_parser_overrides(&ParserSelection::Auto, &ParserSelection::None)
.unwrap_err();
assert_eq!(
reasoning_error.to_string(),
"gpt_oss uses native Harmony output parsing; generic reasoning parser override `none` is not supported"
);
let tool_error = validate_harmony_parser_overrides(
&ParserSelection::Explicit("json".to_string()),
&ParserSelection::Auto,
)
.unwrap_err();
assert_eq!(
tool_error.to_string(),
"gpt_oss uses native Harmony output parsing; generic tool parser override `json` is not supported"
);
}
#[test]
#[file_serial(harmony_tiktoken_cache)]
fn allows_auto_auto_only() {
validate_harmony_parser_overrides(&ParserSelection::Auto, &ParserSelection::Auto).unwrap();
let _ = HarmonyChatOutputProcessor::new(&ChatRequest::for_test()).unwrap();
}
+135
View File
@@ -0,0 +1,135 @@
use std::pin::Pin;
use std::sync::Arc;
use futures::Stream;
use subenum::subenum;
use trait_set::trait_set;
use uuid::Uuid;
use vllm_text::output::{DecodedLogprobs, DecodedPromptLogprobs, DecodedTextEvent};
use crate::FinishReason;
use crate::error::Result;
use crate::event::{AssistantBlockKind, ChatEvent};
mod default;
mod harmony;
mod structured;
pub use default::DefaultChatOutputProcessor;
pub use harmony::HarmonyChatOutputProcessor;
pub(crate) use harmony::validate_harmony_parser_overrides;
/// Internal assistant event before final assembly.
///
/// - [`ContentEvent`]: subenum after reasoning parsing, carries only text content.
/// - [`AssistantEvent`]: full event after tool parsing, adds tool-call variants.
#[subenum(ContentEvent)]
#[derive(Debug, Clone, PartialEq)]
pub(crate) enum AssistantEvent {
#[subenum(ContentEvent)]
Start {
prompt_token_ids: Arc<[u32]>,
prompt_logprobs: Option<DecodedPromptLogprobs>,
},
#[subenum(ContentEvent)]
TextDelta {
kind: AssistantBlockKind,
delta: String,
},
/// Per-decoded-update sample metadata: logprobs and/or output token IDs.
#[subenum(ContentEvent)]
LogprobsDelta {
logprobs: Option<DecodedLogprobs>,
token_ids: Vec<u32>,
},
/// The start of a new tool call, with its declared name and generated ID.
ToolCallStart { id: String, name: String },
/// A delta for the arguments of the currently open tool call. Must follow a
/// `ToolCallStart`.
ToolCallArgumentsDelta { delta: String },
#[subenum(ContentEvent)]
Done {
prompt_token_count: usize,
output_token_count: usize,
finish_reason: FinishReason,
/// Connector-specific KV transfer parameters for disaggregated serving.
kv_transfer_params: Option<serde_json::Value>,
},
}
impl ContentEvent {
/// Convert a [`DecodedTextEvent`] into one or more [`ContentEvent`] values
/// by treating all text as plain (non-reasoning) content.
fn from_decoded_plain_text(event: DecodedTextEvent) -> Vec<Self> {
match event {
DecodedTextEvent::Start {
prompt_token_ids,
prompt_logprobs,
} => vec![Self::Start {
prompt_token_ids,
prompt_logprobs,
}],
DecodedTextEvent::TextDelta {
delta,
token_ids,
logprobs,
finished,
} => {
let mut events = Vec::new();
if !delta.is_empty() {
events.push(Self::TextDelta {
kind: AssistantBlockKind::Text,
delta,
});
}
if logprobs.is_some() || !token_ids.is_empty() {
events.push(Self::LogprobsDelta {
logprobs,
token_ids,
});
}
if let Some(finished) = finished {
events.push(Self::Done {
prompt_token_count: finished.prompt_token_count,
output_token_count: finished.output_token_count,
finish_reason: finished.finish_reason,
kv_transfer_params: finished.kv_transfer_params,
});
}
events
}
}
}
}
/// Boxed stream of decoded text events coming from [`vllm_text`].
pub type DynDecodedTextEventStream = Pin<Box<dyn Stream<Item = Result<DecodedTextEvent>> + Send>>;
/// Boxed stream of structured chat events exposed by [`crate::ChatLlm`].
pub type DynChatEventStream = Pin<Box<dyn Stream<Item = Result<ChatEvent>> + Send>>;
/// Request-scoped output processor from decoded text events into structured
/// chat events.
pub trait ChatOutputProcessor: Send {
/// Consume decoded text stream and return the structured chat-event stream.
fn process(self: Box<Self>, decoded: DynDecodedTextEventStream) -> Result<DynChatEventStream>;
}
/// Trait-object form of [`ChatOutputProcessor`].
pub type DynChatOutputProcessor = Box<dyn ChatOutputProcessor>;
trait_set! {
/// Boxed-stream constraint for decoded text updates.
pub(crate) trait DecodedTextEventStream = Stream<Item = Result<DecodedTextEvent>> + Send + 'static;
/// Boxed-stream constraint for internal assistant events.
pub(crate) trait AssistantEventStream = Stream<Item = Result<AssistantEvent>> + Send + 'static;
/// Boxed-stream constraint for public chat events.
pub(crate) trait ChatEventStream = Stream<Item = Result<ChatEvent>> + Send + 'static;
}
/// Generate the northbound tool-call ID using the OpenAI-style `call_<id>`
/// format.
// TODO: support other ID scheme like Kimi-K2's
// `functions.{name}:{global_index}`.
pub(crate) fn generate_tool_call_id() -> String {
format!("call_{}", &Uuid::new_v4().simple().to_string()[..24])
}
+508
View File
@@ -0,0 +1,508 @@
//! Adapts parsed assistant updates into structured chat events.
//!
//! This module remains the final assembly stage in `vllm-chat`. Token-to-text
//! decoding still lives in `decoded.rs`, while reasoning separation and tool
//! parsing are handled earlier by their own adapters. This stage consumes those
//! parsed deltas and assembles higher-level assistant content blocks.
use asynk_strim_attr::{TryYielder, try_stream};
use futures::{StreamExt as _, pin_mut};
use vllm_text::DecodedLogprobs;
use super::{AssistantEvent, AssistantEventStream};
use crate::error::Error;
use crate::event::{
AssistantBlockKind, AssistantContentBlock, AssistantMessage, AssistantToolCall, ChatEvent,
};
use crate::{FinishReason, Result};
/// One currently open assistant text-like block being assembled from streamed
/// deltas.
struct OpenTextBlock {
/// Stable position of this block in the final assistant message.
index: usize,
/// Semantic kind of the block being assembled.
kind: AssistantBlockKind,
/// Accumulated text payload for the block.
text: String,
}
/// One currently open assistant tool call being assembled from streamed deltas.
struct OpenToolCall {
/// Stable ordinal of this tool call in the assistant tool-call list.
index: usize,
/// Stable tool-call ID exposed northbound.
id: String,
/// Function name.
name: String,
/// Incremental JSON arguments accumulated so far.
arguments: String,
}
/// Per-stream block assembly state.
///
/// The adapter maintains at most one open text block and one open tool call,
/// and appends deltas to them until the semantic kind changes or the stream
/// terminates.
struct StructuredEventState {
/// Final assistant message assembled so far.
message: AssistantMessage,
/// Currently open text or reasoning block, if any.
open_text_block: Option<OpenTextBlock>,
/// Currently open tool call, if any.
open_tool_call: Option<OpenToolCall>,
/// Next OpenAI-compatible tool-call ordinal.
next_tool_call_index: usize,
}
impl StructuredEventState {
/// Create one fresh assembly state for a new streamed response.
fn new() -> Self {
Self {
message: AssistantMessage::default(),
open_text_block: None,
open_tool_call: None,
next_tool_call_index: 0,
}
}
/// Convert one parsed text delta into zero or more structured chat events.
fn process_text_delta(
&mut self,
kind: AssistantBlockKind,
delta: String,
) -> Result<Vec<ChatEvent>> {
let mut events = Vec::new();
self.close_open_tool_call(&mut events);
self.push_text_delta(kind, delta, &mut events);
Ok(events)
}
/// Forward per-update sample metadata without attaching it to text blocks.
fn process_logprobs_delta(
&mut self,
logprobs: Option<DecodedLogprobs>,
token_ids: Vec<u32>,
) -> Result<Vec<ChatEvent>> {
Ok(vec![ChatEvent::LogprobsDelta {
logprobs,
token_ids,
}])
}
/// Start one new tool call, closing any incompatible open block first.
fn start_tool_call(&mut self, id: String, name: String) -> Result<Vec<ChatEvent>> {
let mut events = Vec::new();
self.close_open_text_block(&mut events);
self.close_open_tool_call(&mut events);
let index = self.next_tool_call_index;
self.next_tool_call_index += 1;
self.open_tool_call = Some(OpenToolCall {
index,
id: id.clone(),
name: name.clone(),
arguments: String::new(),
});
events.push(ChatEvent::ToolCallStart { index, id, name });
Ok(events)
}
/// Append one incremental tool-call arguments delta.
fn push_tool_call_arguments(&mut self, delta: String) -> Result<Vec<ChatEvent>> {
let mut events = Vec::new();
let Some(open_tool_call) = self.open_tool_call.as_mut() else {
return Err(Error::ToolCallStreamInvariant {
message: "received tool-call arguments delta without an open tool call".to_string(),
});
};
open_tool_call.arguments.push_str(&delta);
events.push(ChatEvent::ToolCallArgumentsDelta {
index: open_tool_call.index,
delta,
});
Ok(events)
}
/// Close any open block and emit the terminal `Done` event.
fn finish(
&mut self,
prompt_token_count: usize,
output_token_count: usize,
finish_reason: FinishReason,
kv_transfer_params: Option<serde_json::Value>,
) -> Result<Vec<ChatEvent>> {
let mut events = Vec::new();
self.close_open_text_block(&mut events);
self.close_open_tool_call(&mut events);
events.push(ChatEvent::Done {
message: self.message.clone(),
prompt_token_count,
output_token_count,
finish_reason,
kv_transfer_params,
});
Ok(events)
}
/// Append one semantic text delta to the current block, or open a new block
/// when the semantic kind changes.
fn push_text_delta(
&mut self,
kind: AssistantBlockKind,
delta: String,
events: &mut Vec<ChatEvent>,
) {
if delta.is_empty() {
return;
}
match self.open_text_block.as_mut() {
// If there's a currently open block of the same kind, append to it.
Some(open_block) if open_block.kind == kind => {
open_block.text.push_str(&delta);
events.push(ChatEvent::BlockDelta {
index: open_block.index,
kind,
delta,
});
}
// Otherwise, close the currently open block (if any) and start a
// new one.
_ => {
self.close_open_text_block(events);
let index = self.message.content.len();
self.open_text_block = Some(OpenTextBlock {
index,
kind,
text: delta.clone(),
});
events.push(ChatEvent::BlockStart { index, kind });
events.push(ChatEvent::BlockDelta { index, kind, delta });
}
}
}
/// Finalize the currently open text block, if present.
fn close_open_text_block(&mut self, events: &mut Vec<ChatEvent>) {
let Some(open_block) = self.open_text_block.take() else {
return;
};
let block = match open_block.kind {
AssistantBlockKind::Text => AssistantContentBlock::Text {
text: open_block.text,
},
AssistantBlockKind::Reasoning => AssistantContentBlock::Reasoning {
text: open_block.text,
},
AssistantBlockKind::ToolCall => {
unreachable!("tool calls must not be assembled as text blocks")
}
};
self.message.push_block(block.clone());
events.push(ChatEvent::BlockEnd {
index: open_block.index,
block,
});
}
/// Finalize the currently open tool call, if present.
fn close_open_tool_call(&mut self, events: &mut Vec<ChatEvent>) {
let Some(open_tool_call) = self.open_tool_call.take() else {
return;
};
let call = AssistantToolCall {
id: open_tool_call.id,
name: open_tool_call.name,
arguments: open_tool_call.arguments,
};
self.message.push_block(AssistantContentBlock::ToolCall(call.clone()));
events.push(ChatEvent::ToolCallEnd {
index: open_tool_call.index,
call,
});
}
}
/// Wrap one parsed assistant stream into the public structured chat event
/// stream.
#[try_stream]
pub(crate) async fn structured_chat_event_stream(
stream: impl AssistantEventStream,
mut y: TryYielder<ChatEvent, Error>,
) -> Result<()> {
pin_mut!(stream);
let mut state = StructuredEventState::new();
while let Some(event) = stream.next().await.transpose()? {
match event {
AssistantEvent::Start {
prompt_token_ids,
prompt_logprobs,
} => {
y.yield_ok(ChatEvent::Start {
prompt_token_ids,
prompt_logprobs,
})
.await;
}
AssistantEvent::TextDelta { kind, delta } => {
for next in state.process_text_delta(kind, delta)? {
y.yield_ok(next).await;
}
}
AssistantEvent::LogprobsDelta {
logprobs,
token_ids,
} => {
for next in state.process_logprobs_delta(logprobs, token_ids)? {
y.yield_ok(next).await;
}
}
AssistantEvent::ToolCallStart { id, name } => {
for next in state.start_tool_call(id, name)? {
y.yield_ok(next).await;
}
}
AssistantEvent::ToolCallArgumentsDelta { delta } => {
for next in state.push_tool_call_arguments(delta)? {
y.yield_ok(next).await;
}
}
AssistantEvent::Done {
prompt_token_count,
output_token_count,
finish_reason,
kv_transfer_params,
} => {
for next in state.finish(
prompt_token_count,
output_token_count,
finish_reason,
kv_transfer_params,
)? {
y.yield_ok(next).await;
}
}
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use futures::{StreamExt as _, stream};
use super::structured_chat_event_stream;
use crate::FinishReason;
use crate::error::Error;
use crate::event::{AssistantBlockKind, AssistantMessageExt as _, ChatEvent};
use crate::output::AssistantEvent;
#[tokio::test]
async fn structured_stream_closes_tool_call_on_done() {
let events = stream::iter(vec![
Ok(AssistantEvent::ToolCallStart {
id: "call_1".to_string(),
name: "get_weather".to_string(),
}),
Ok(AssistantEvent::ToolCallArgumentsDelta {
delta: r#"{"city":"Paris"}"#.to_string(),
}),
Ok(AssistantEvent::Done {
prompt_token_count: 1,
output_token_count: 1,
finish_reason: FinishReason::stop_eos(),
kv_transfer_params: None,
}),
]);
let events = structured_chat_event_stream(events)
.collect::<Vec<_>>()
.await
.into_iter()
.collect::<crate::Result<Vec<_>>>()
.unwrap();
assert!(matches!(events[0], ChatEvent::ToolCallStart { .. }));
assert!(matches!(
events[1],
ChatEvent::ToolCallArgumentsDelta { .. }
));
let ChatEvent::ToolCallEnd { call, .. } = &events[2] else {
panic!("expected tool call end");
};
assert_eq!(call.name, "get_weather");
assert_eq!(call.arguments, r#"{"city":"Paris"}"#);
let ChatEvent::Done { message, .. } = &events[3] else {
panic!("expected done");
};
let tool_calls = message.tool_calls().collect::<Vec<_>>();
assert_eq!(tool_calls.len(), 1);
assert_eq!(tool_calls[0].id, "call_1");
assert_eq!(tool_calls[0].arguments, r#"{"city":"Paris"}"#);
}
#[tokio::test]
async fn structured_stream_closes_previous_tool_call_on_next_start() {
let events = stream::iter(vec![
Ok(AssistantEvent::ToolCallStart {
id: "call_1".to_string(),
name: "first".to_string(),
}),
Ok(AssistantEvent::ToolCallArgumentsDelta {
delta: r#"{"a":1}"#.to_string(),
}),
Ok(AssistantEvent::ToolCallStart {
id: "call_2".to_string(),
name: "second".to_string(),
}),
Ok(AssistantEvent::ToolCallArgumentsDelta {
delta: r#"{"b":2}"#.to_string(),
}),
Ok(AssistantEvent::Done {
prompt_token_count: 1,
output_token_count: 1,
finish_reason: FinishReason::stop_eos(),
kv_transfer_params: None,
}),
]);
let events = structured_chat_event_stream(events)
.collect::<Vec<_>>()
.await
.into_iter()
.collect::<crate::Result<Vec<_>>>()
.unwrap();
assert!(matches!(events[0], ChatEvent::ToolCallStart { .. }));
assert!(matches!(
events[1],
ChatEvent::ToolCallArgumentsDelta { .. }
));
let ChatEvent::ToolCallEnd { call, .. } = &events[2] else {
panic!("expected first tool call end");
};
assert_eq!(call.name, "first");
assert!(matches!(events[3], ChatEvent::ToolCallStart { .. }));
let ChatEvent::Done { message, .. } = &events[6] else {
panic!("expected done");
};
let tool_calls = message.tool_calls().collect::<Vec<_>>();
assert_eq!(tool_calls.len(), 2);
assert_eq!(tool_calls[0].name, "first");
assert_eq!(tool_calls[1].name, "second");
}
#[tokio::test]
async fn structured_stream_numbers_tool_calls_independent_of_text_blocks() {
let events = stream::iter(vec![
Ok(AssistantEvent::TextDelta {
kind: AssistantBlockKind::Text,
delta: "before".to_string(),
}),
Ok(AssistantEvent::ToolCallStart {
id: "call_1".to_string(),
name: "get_weather".to_string(),
}),
Ok(AssistantEvent::ToolCallArgumentsDelta {
delta: r#"{"city":"Paris"}"#.to_string(),
}),
Ok(AssistantEvent::Done {
prompt_token_count: 1,
output_token_count: 1,
finish_reason: FinishReason::stop_eos(),
kv_transfer_params: None,
}),
]);
let events = structured_chat_event_stream(events)
.collect::<Vec<_>>()
.await
.into_iter()
.collect::<crate::Result<Vec<_>>>()
.unwrap();
assert!(matches!(
events[0],
ChatEvent::BlockStart {
index: 0,
kind: AssistantBlockKind::Text,
}
));
assert!(matches!(events[2], ChatEvent::BlockEnd { index: 0, .. }));
assert!(matches!(
events[3],
ChatEvent::ToolCallStart { index: 0, .. }
));
assert!(matches!(
events[4],
ChatEvent::ToolCallArgumentsDelta { index: 0, .. }
));
assert!(matches!(events[5], ChatEvent::ToolCallEnd { index: 0, .. }));
}
#[tokio::test]
async fn structured_stream_closes_tool_call_before_text() {
let events = stream::iter(vec![
Ok(AssistantEvent::ToolCallStart {
id: "call_1".to_string(),
name: "get_weather".to_string(),
}),
Ok(AssistantEvent::ToolCallArgumentsDelta {
delta: r#"{"city":"Paris"}"#.to_string(),
}),
Ok(AssistantEvent::TextDelta {
kind: AssistantBlockKind::Text,
delta: "done".to_string(),
}),
Ok(AssistantEvent::Done {
prompt_token_count: 1,
output_token_count: 1,
finish_reason: FinishReason::stop_eos(),
kv_transfer_params: None,
}),
]);
let events = structured_chat_event_stream(events)
.collect::<Vec<_>>()
.await
.into_iter()
.collect::<crate::Result<Vec<_>>>()
.unwrap();
assert!(matches!(events[2], ChatEvent::ToolCallEnd { .. }));
assert!(matches!(
events[3],
ChatEvent::BlockStart {
kind: AssistantBlockKind::Text,
..
}
));
let ChatEvent::Done { message, .. } = &events[6] else {
panic!("expected done");
};
assert_eq!(message.text(), "done");
assert_eq!(message.tool_calls().count(), 1);
}
#[tokio::test]
async fn structured_stream_rejects_arguments_without_open_tool_call() {
let events = stream::iter(vec![Ok(AssistantEvent::ToolCallArgumentsDelta {
delta: "{}".to_string(),
})]);
let err = structured_chat_event_stream(events)
.collect::<Vec<_>>()
.await
.into_iter()
.next()
.expect("expected one event")
.expect_err("expected invariant error");
assert!(matches!(err, Error::ToolCallStreamInvariant { .. }));
}
}
+107
View File
@@ -0,0 +1,107 @@
pub mod reasoning;
pub mod tool;
use std::collections::HashMap;
use std::convert::Infallible;
use std::fmt;
use std::str::FromStr;
use serde_with::DeserializeFromStr;
/// Specify which reasoning or tool-call parser implementation to use.
#[derive(Debug, Clone, PartialEq, Eq, Default, DeserializeFromStr)]
pub enum ParserSelection {
/// Use model-based auto-detection.
#[default]
Auto,
/// Disable the parser entirely.
None,
/// Force one specific parser implementation by name.
Explicit(String),
}
impl ParserSelection {
pub const AUTO_LITERAL: &str = "auto";
pub const NONE_LITERAL: &str = "none";
}
impl FromStr for ParserSelection {
type Err = Infallible;
fn from_str(value: &str) -> Result<Self, Self::Err> {
Ok(if value.eq_ignore_ascii_case(Self::AUTO_LITERAL) {
Self::Auto
} else if value.eq_ignore_ascii_case(Self::NONE_LITERAL) {
Self::None
} else {
Self::Explicit(value.to_owned())
})
}
}
impl fmt::Display for ParserSelection {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Auto => f.write_str(Self::AUTO_LITERAL),
Self::None => f.write_str(Self::NONE_LITERAL),
Self::Explicit(name) => f.write_str(name),
}
}
}
/// Registry and model matcher for reasoning and tool parsers.
#[derive(Clone)]
pub struct ParserFactory<C> {
creators: HashMap<String, C>,
patterns: Vec<(String, String)>,
}
impl<C> Default for ParserFactory<C> {
fn default() -> Self {
Self {
creators: HashMap::new(),
patterns: Vec::new(),
}
}
}
impl<C> ParserFactory<C> {
/// Register a creator for a parser by an exact name.
pub fn register_creator(&mut self, name: &str, creator: C) -> &mut Self {
self.creators.insert(name.to_string(), creator);
self
}
/// Add a case-insensitive substring match from model ID to parser name.
pub fn register_pattern(&mut self, pattern: &str, parser_name: &str) -> &mut Self {
self.patterns.push((pattern.to_lowercase(), parser_name.to_string()));
self
}
/// Return the first registered parser name matching the given model ID.
pub fn resolve_name_for_model(&self, model_id: &str) -> Option<&str> {
let model_lower = model_id.to_lowercase();
self.patterns
.iter()
.find(|(pattern, _)| model_lower.contains(pattern))
.map(|(_, parser_name)| parser_name.as_str())
}
/// Return true if the exact parser name is registered.
pub fn contains(&self, name: &str) -> bool {
self.creators.contains_key(name)
}
/// Return all registered parser names sorted for stable display.
pub fn list(&self) -> Vec<String> {
let mut names: Vec<_> = self.creators.keys().cloned().collect();
names.sort_unstable();
names
}
/// Get the constructor for a parser by its exact registered name, or return
/// None if not found.
pub fn creator(&self, name: &str) -> Option<&C> {
self.creators.get(name)
}
}
+120
View File
@@ -0,0 +1,120 @@
//! Reasoning parser registration and selection boundary for `vllm-chat`.
use std::sync::LazyLock;
pub use vllm_reasoning_parser::{
CohereCmdReasoningParser, DeepSeekR1ReasoningParser, DeepSeekV3ReasoningParser,
DeepSeekV4ReasoningParser, Gemma4ReasoningParser, Glm45ReasoningParser, KimiK2ReasoningParser,
KimiReasoningParser, MiniMaxM2ReasoningParser, NemotronV3ReasoningParser, Qwen3ReasoningParser,
ReasoningDelta, ReasoningError, ReasoningParser, Step3ReasoningParser,
};
use vllm_tokenizer::DynTokenizer;
use crate::parser::ParserFactory;
/// Canonical public names for registered reasoning parsers.
pub mod names {
pub const COHERE_CMD: &str = "cohere_cmd";
pub const DEEPSEEK_R1: &str = "deepseek_r1";
pub const DEEPSEEK_V3: &str = "deepseek_v3";
pub const DEEPSEEK_V4: &str = "deepseek_v4";
pub const GEMMA4: &str = "gemma4";
pub const GLM45: &str = "glm45";
pub const KIMI: &str = "kimi";
pub const KIMI_K2: &str = "kimi_k2";
pub const MINIMAX_M2: &str = "minimax_m2";
pub const NEMOTRON_V3: &str = "nemotron_v3";
pub const QWEN3: &str = "qwen3";
pub const STEP3: &str = "step3";
}
/// Constructor signature for one registered reasoning parser implementation.
type ReasoningParserCreator =
fn(DynTokenizer) -> vllm_reasoning_parser::Result<Box<dyn ReasoningParser>>;
/// Registry and model matcher for reasoning parsers.
pub type ReasoningParserFactory = ParserFactory<ReasoningParserCreator>;
impl ReasoningParserFactory {
/// Get the global reasoning parser factory with built-in registrations and
/// model mappings.
pub fn global() -> &'static Self {
static INSTANCE: LazyLock<ReasoningParserFactory> =
LazyLock::new(ReasoningParserFactory::new);
&INSTANCE
}
/// Create the default registry with built-in parser names and model
/// mappings.
pub fn new() -> Self {
let mut factory = Self::default();
factory
.register_parser::<CohereCmdReasoningParser>(names::COHERE_CMD)
.register_parser::<DeepSeekR1ReasoningParser>(names::DEEPSEEK_R1)
.register_parser::<DeepSeekV3ReasoningParser>(names::DEEPSEEK_V3)
.register_parser::<DeepSeekV4ReasoningParser>(names::DEEPSEEK_V4)
.register_parser::<Gemma4ReasoningParser>(names::GEMMA4)
.register_parser::<Glm45ReasoningParser>(names::GLM45)
.register_parser::<KimiReasoningParser>(names::KIMI)
.register_parser::<KimiK2ReasoningParser>(names::KIMI_K2)
.register_parser::<MiniMaxM2ReasoningParser>(names::MINIMAX_M2)
.register_parser::<NemotronV3ReasoningParser>(names::NEMOTRON_V3)
.register_parser::<Qwen3ReasoningParser>(names::QWEN3)
.register_parser::<Step3ReasoningParser>(names::STEP3);
factory
.register_pattern("deepseek-r1", names::DEEPSEEK_R1)
.register_pattern("deepseek-v4", names::DEEPSEEK_V4)
.register_pattern("deepseek_v4", names::DEEPSEEK_V4)
.register_pattern("deepseek-v3", names::DEEPSEEK_V3)
.register_pattern("gemma-4", names::GEMMA4)
.register_pattern("gemma4", names::GEMMA4)
.register_pattern("qwen", names::QWEN3)
.register_pattern("glm-5", names::GLM45)
.register_pattern("glm-4.7", names::GLM45)
.register_pattern("glm-4.6", names::GLM45)
.register_pattern("glm-4.5", names::GLM45)
.register_pattern("kimi-k2", names::KIMI_K2)
.register_pattern("kimi", names::KIMI)
.register_pattern("step3", names::STEP3)
.register_pattern("minimax", names::MINIMAX_M2)
.register_pattern("mm-m2", names::MINIMAX_M2)
.register_pattern("cohere", names::COHERE_CMD)
.register_pattern("command", names::COHERE_CMD)
.register_pattern("nano", names::NEMOTRON_V3)
.register_pattern("nemotron", names::NEMOTRON_V3);
factory
}
/// Register one parser type that exposes a static `create()` constructor.
pub fn register_parser<T>(&mut self, name: &str) -> &mut Self
where
T: ReasoningParser + 'static,
{
self.register_creator(name, T::create)
}
/// Construct a parser from an exact name.
pub fn create(
&self,
name: &str,
tokenizer: DynTokenizer,
) -> crate::Result<Box<dyn ReasoningParser>> {
let creator = self.creator(name).ok_or_else(|| crate::Error::ParserUnavailableByName {
kind: "reasoning",
name: name.to_string(),
available_names: self.list(),
})?;
creator(tokenizer).map_err(|error| crate::Error::ParserInitialization {
kind: "reasoning",
name: name.to_string(),
error: error.into(),
})
}
}
#[cfg(test)]
mod tests;
@@ -0,0 +1,61 @@
use std::sync::Arc;
use vllm_tokenizer::Tokenizer;
use super::{ReasoningParserFactory, names};
struct FakeTokenizer;
impl Tokenizer for FakeTokenizer {
fn encode(&self, text: &str, _add_special_tokens: bool) -> vllm_tokenizer::Result<Vec<u32>> {
Ok(text.chars().map(u32::from).collect())
}
fn decode(
&self,
token_ids: &[u32],
_skip_special_tokens: bool,
) -> vllm_tokenizer::Result<String> {
Ok(token_ids
.iter()
.map(|token_id| char::from_u32(*token_id).unwrap_or('\u{FFFD}'))
.collect())
}
fn token_to_id(&self, _token: &str) -> Option<u32> {
None
}
}
#[test]
fn factory_contains_and_lists_registered_parsers() {
let factory = ReasoningParserFactory::new();
assert!(factory.contains(names::QWEN3));
assert!(factory.contains(names::DEEPSEEK_V4));
assert!(factory.list().contains(&names::QWEN3.to_string()));
assert!(factory.list().contains(&names::DEEPSEEK_V4.to_string()));
}
#[test]
fn factory_resolves_deepseek_v4_to_qwen3_alias() {
let factory = ReasoningParserFactory::new();
assert_eq!(
factory.resolve_name_for_model("deepseek-ai/DeepSeek-V4"),
Some(names::DEEPSEEK_V4)
);
assert_eq!(
factory.resolve_name_for_model("deepseek_v4"),
Some(names::DEEPSEEK_V4)
);
}
#[test]
fn factory_rejects_unknown_parser_names() {
let tokenizer = Arc::new(FakeTokenizer);
let factory = ReasoningParserFactory::new();
let error = match factory.create("missing", tokenizer) {
Ok(_) => panic!("expected parser lookup to fail"),
Err(error) => error,
};
assert!(error.to_string().contains("choose from"));
}
+140
View File
@@ -0,0 +1,140 @@
//! Tool parser registration and selection boundary for `vllm-chat`.
use std::sync::LazyLock;
pub use vllm_tool_parser::{
DeepSeekV3ToolParser, DeepSeekV4ToolParser, DeepSeekV31ToolParser, DeepSeekV32ToolParser,
Gemma4ToolParser, Glm45MoeToolParser, Glm47MoeToolParser, HermesToolParser, KimiK2ToolParser,
Llama3JsonToolParser, MinimaxM2ToolParser, MistralToolParser, Qwen3CoderToolParser,
Qwen3XmlToolParser, ToolCallDelta, ToolParseResult, ToolParser, ToolParserError,
};
use crate::parser::ParserFactory;
use crate::request::ChatTool;
/// Canonical public names for registered tool parsers.
pub mod names {
pub const DEEPSEEK_V3: &str = "deepseek_v3";
pub const DEEPSEEK_V31: &str = "deepseek_v31";
pub const DEEPSEEK_V32: &str = "deepseek_v32";
pub const DEEPSEEK_V4: &str = "deepseek_v4";
pub const GLM45: &str = "glm45";
pub const GLM47: &str = "glm47";
pub const GEMMA4: &str = "gemma4";
pub const HERMES: &str = "hermes";
pub const KIMI_K2: &str = "kimi_k2";
pub const LLAMA3_JSON: &str = "llama3_json";
pub const LLAMA4_JSON: &str = "llama4_json";
pub const MINIMAX_M2: &str = "minimax_m2";
pub const MISTRAL: &str = "mistral";
pub const QWEN3_CODER: &str = "qwen3_coder";
pub const QWEN3_XML: &str = "qwen3_xml";
}
/// Constructor signature for one registered tool parser implementation.
type ToolParserCreator = fn(&[ChatTool]) -> vllm_tool_parser::Result<Box<dyn ToolParser>>;
/// Registry and model matcher for tool parsers.
pub type ToolParserFactory = ParserFactory<ToolParserCreator>;
impl ToolParserFactory {
/// Get the global tool parser factory with built-in registrations and model
/// mappings.
pub fn global() -> &'static Self {
static INSTANCE: LazyLock<ToolParserFactory> = LazyLock::new(ToolParserFactory::new);
&INSTANCE
}
/// Create the default registry with built-in parser names and model
/// mappings.
pub fn new() -> Self {
let mut factory = Self::default();
factory
.register_parser::<DeepSeekV3ToolParser>(names::DEEPSEEK_V3)
.register_parser::<DeepSeekV31ToolParser>(names::DEEPSEEK_V31)
.register_parser::<DeepSeekV32ToolParser>(names::DEEPSEEK_V32)
.register_parser::<DeepSeekV4ToolParser>(names::DEEPSEEK_V4)
.register_parser::<Glm45MoeToolParser>(names::GLM45)
.register_parser::<Glm47MoeToolParser>(names::GLM47)
.register_parser::<Gemma4ToolParser>(names::GEMMA4)
.register_parser::<HermesToolParser>(names::HERMES)
.register_parser::<KimiK2ToolParser>(names::KIMI_K2)
.register_parser::<Llama3JsonToolParser>(names::LLAMA3_JSON)
.register_parser::<Llama3JsonToolParser>(names::LLAMA4_JSON)
.register_parser::<MinimaxM2ToolParser>(names::MINIMAX_M2)
.register_parser::<MistralToolParser>(names::MISTRAL)
.register_parser::<Qwen3XmlToolParser>(names::QWEN3_XML)
.register_parser::<Qwen3CoderToolParser>(names::QWEN3_CODER);
factory
.register_pattern("mistral-", names::MISTRAL)
.register_pattern("mixtral-", names::MISTRAL)
.register_pattern("qwen3-coder", names::QWEN3_CODER)
.register_pattern("qwen2.5-coder", names::QWEN3_CODER)
.register_pattern("qwen3.5", names::QWEN3_CODER)
.register_pattern("qwen", names::QWEN3_XML)
.register_pattern("hermes", names::HERMES)
.register_pattern("llama-4", names::LLAMA4_JSON)
.register_pattern("llama-3.2", names::LLAMA3_JSON)
.register_pattern("llama-3.1", names::LLAMA3_JSON)
.register_pattern("deepseek-r1", names::DEEPSEEK_V3)
.register_pattern("deepseek-v4", names::DEEPSEEK_V4)
.register_pattern("deepseek_v4", names::DEEPSEEK_V4)
.register_pattern("deepseek-v3.2", names::DEEPSEEK_V32)
.register_pattern("deepseek-v3.1", names::DEEPSEEK_V31)
.register_pattern("deepseek-v3", names::DEEPSEEK_V3)
.register_pattern("glm-5", names::GLM47)
.register_pattern("glm-4.7", names::GLM47)
.register_pattern("glm-4.6", names::GLM45)
.register_pattern("glm-4.5", names::GLM45)
.register_pattern("gemma4", names::GEMMA4)
.register_pattern("gemma-4", names::GEMMA4)
.register_pattern("kimi-k2", names::KIMI_K2)
.register_pattern("minimax", names::MINIMAX_M2)
.register_pattern("mm-m2", names::MINIMAX_M2);
factory
}
/// Register one parser type that exposes a static `create()` constructor.
pub fn register_parser<T>(&mut self, name: &str) -> &mut Self
where
T: ToolParser + 'static,
{
self.register_creator(name, T::create)
}
/// Construct a parser from an exact name.
pub fn create(&self, name: &str, tools: &[ChatTool]) -> crate::Result<Box<dyn ToolParser>> {
let creator = self.creator(name).ok_or_else(|| crate::Error::ParserUnavailableByName {
kind: "tool",
name: name.to_string(),
available_names: self.list(),
})?;
creator(tools).map_err(|error| crate::Error::ParserInitialization {
kind: "tool",
name: name.to_string(),
error: error.into(),
})
}
/// Resolve a parser from model ID and then construct it.
pub fn create_for_model(
&self,
model_id: &str,
tools: &[ChatTool],
) -> crate::Result<Box<dyn ToolParser>> {
let name = self.resolve_name_for_model(model_id).ok_or_else(|| {
crate::Error::ParserUnavailableForModel {
kind: "tool",
model_id: model_id.to_string(),
}
})?;
self.create(name, tools)
}
}
#[cfg(test)]
mod tests;
+152
View File
@@ -0,0 +1,152 @@
use vllm_tool_parser::Result;
use super::{ToolParseResult, ToolParser, ToolParserFactory, names};
use crate::Error;
use crate::request::ChatTool;
struct FakeToolParser;
impl ToolParser for FakeToolParser {
fn create(_tools: &[ChatTool]) -> Result<Box<dyn ToolParser>>
where
Self: Sized + 'static,
{
Ok(Box::new(Self))
}
fn preserve_special_tokens(&self) -> bool {
true
}
fn push(&mut self, _chunk: &str) -> Result<ToolParseResult> {
Ok(ToolParseResult::default())
}
}
#[test]
fn default_factory_starts_empty() {
let factory = ToolParserFactory::default();
assert!(factory.list().is_empty());
}
#[test]
fn factory_contains_and_creates_registered_parsers() {
let mut factory = ToolParserFactory::default();
factory.register_parser::<FakeToolParser>("fake");
assert!(factory.contains("fake"));
assert!(factory.list().contains(&"fake".to_string()));
factory.create("fake", &[]).unwrap();
}
#[test]
fn factory_rejects_unknown_parser_names() {
let factory = ToolParserFactory::default();
let error = match factory.create("missing", &[]) {
Ok(_) => panic!("expected parser lookup to fail"),
Err(error) => error,
};
assert!(matches!(error, Error::ParserUnavailableByName { .. }));
}
#[test]
fn factory_rejects_unknown_models() {
let factory = ToolParserFactory::default();
let error = match factory.create_for_model("definitely-unknown-model", &[]) {
Ok(_) => panic!("expected model lookup to fail"),
Err(error) => error,
};
assert!(matches!(error, Error::ParserUnavailableForModel { .. }));
}
#[test]
fn factory_creates_registered_parser_for_model() {
let mut factory = ToolParserFactory::default();
factory
.register_parser::<FakeToolParser>("fake")
.register_pattern("fake-model", "fake");
factory.create_for_model("my-fake-model-v1", &[]).unwrap();
}
#[test]
fn factory_new_resolves_default_patterns() {
let factory = ToolParserFactory::new();
assert_eq!(
factory.resolve_name_for_model("Qwen/Qwen3.5-0.8B"),
Some(names::QWEN3_CODER)
);
assert_eq!(
factory.resolve_name_for_model("Qwen/Qwen3-0.6B"),
Some(names::QWEN3_XML)
);
assert_eq!(
factory.resolve_name_for_model("Qwen/Qwen3-Coder-30B"),
Some(names::QWEN3_CODER)
);
assert_eq!(
factory.resolve_name_for_model("meta-llama-4-maverick"),
Some(names::LLAMA4_JSON)
);
assert_eq!(
factory.resolve_name_for_model("meta-llama-3.2-3b-instruct"),
Some(names::LLAMA3_JSON)
);
assert_eq!(
factory.resolve_name_for_model("meta-llama/Llama-3.1-8B-Instruct"),
Some(names::LLAMA3_JSON)
);
assert_eq!(
factory.resolve_name_for_model("deepseek-ai/DeepSeek-V4"),
Some(names::DEEPSEEK_V4)
);
assert_eq!(
factory.resolve_name_for_model("deepseek-ai/DeepSeek-V3.2-Exp"),
Some(names::DEEPSEEK_V32)
);
assert_eq!(
factory.resolve_name_for_model("deepseek-ai/DeepSeek-V4-Chat"),
Some(names::DEEPSEEK_V4)
);
assert_eq!(
factory.resolve_name_for_model("deepseek_v4"),
Some(names::DEEPSEEK_V4)
);
assert_eq!(
factory.resolve_name_for_model("deepseek-ai/DeepSeek-R1-0528"),
Some(names::DEEPSEEK_V3)
);
assert_eq!(
factory.resolve_name_for_model("deepseek-ai/DeepSeek-V3.1"),
Some(names::DEEPSEEK_V31)
);
assert_eq!(
factory.resolve_name_for_model("zai-org/GLM-5-32B-Chat"),
Some(names::GLM47)
);
assert_eq!(
factory.resolve_name_for_model("zai-org/GLM-5.1-32B-Instruct"),
Some(names::GLM47)
);
assert_eq!(
factory.resolve_name_for_model("glm-4.7"),
Some(names::GLM47)
);
assert_eq!(
factory.resolve_name_for_model("google/gemma-4-27b-it"),
Some(names::GEMMA4)
);
assert_eq!(
factory.resolve_name_for_model("NousResearch/Hermes-3-Llama-3.1-8B"),
Some(names::HERMES)
);
assert_eq!(
factory.resolve_name_for_model("MiniMax/MiniMax-M2-01"),
Some(names::MINIMAX_M2)
);
assert_eq!(
factory.resolve_name_for_model("org/mm-m2-base"),
Some(names::MINIMAX_M2)
);
}
@@ -0,0 +1,555 @@
//! DeepSeek V3.2 prompt renderer.
use std::collections::{HashMap, HashSet};
use std::fmt::Write as _;
use serde::Serialize;
use serde_json::Value;
use serde_json_fmt::JsonFormat;
use crate::error::{Error, Result};
use crate::request::{ChatContent, ChatMessage, ChatRequest, ChatRole, ChatTool};
use crate::{AssistantContentBlock, AssistantMessageExt, AssistantToolCall};
const BOS_TOKEN: &str = "<begin▁of▁sentence>";
const EOS_TOKEN: &str = "<end▁of▁sentence>";
const THINKING_START_TOKEN: &str = "<think>";
const THINKING_END_TOKEN: &str = "</think>";
const DSML_TOKEN: &str = "DSML";
/// DeepSeek uses `"chat"` vs `"thinking"` mode names. Keep the split explicit
/// here so the render branches stay easy to read.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum ThinkingMode {
Chat,
Thinking,
}
/// Tool schema shape rendered inside the `<functions>` block.
#[serde_with::skip_serializing_none]
#[derive(Debug, Serialize)]
struct RenderedToolSchema<'a> {
name: &'a str,
description: Option<&'a str>,
parameters: &'a Value,
strict: Option<bool>,
}
/// Render one chat request into the final prompt string.
pub(super) fn render_request(request: &ChatRequest) -> Result<String> {
let thinking_mode = match request.enable_thinking()?.unwrap_or(false) {
true => ThinkingMode::Thinking,
false => ThinkingMode::Chat,
};
let drop_thinking = matches!(
request.messages.last().map(ChatMessage::role),
Some(ChatRole::User | ChatRole::Developer)
);
let render_offset = isize::from(request.tool_parsing_enabled());
let last_user_render_index =
find_last_user_render_index(request.messages.as_slice(), render_offset);
let last_user_actual_index = find_last_user_actual_index(request.messages.as_slice());
let mut prompt = String::from(BOS_TOKEN);
if request.tool_parsing_enabled() {
render_system_message(&mut prompt, None, &request.tools)?;
}
for (message_index, message) in request.messages.iter().enumerate() {
render_message(
&mut prompt,
request.messages.as_slice(),
message_index,
message,
render_offset,
last_user_render_index,
last_user_actual_index,
thinking_mode,
drop_thinking,
)?;
}
Ok(prompt)
}
/// Find the last user-like turn in render order.
///
/// `render_offset` is `1` when a synthetic tool-only system turn is rendered
/// before the real request messages, and `0` otherwise.
fn find_last_user_render_index(messages: &[ChatMessage], render_offset: isize) -> isize {
messages
.iter()
.rposition(|message| matches!(message.role(), ChatRole::User | ChatRole::Developer))
.map(|index| index as isize + render_offset)
.unwrap_or(-1)
}
/// Render one real request message, using `render_offset` to account for any
/// synthetic tool-only system turn that was already emitted before the loop.
fn render_message(
out: &mut String,
messages: &[ChatMessage],
message_index: usize,
message: &ChatMessage,
render_offset: isize,
last_user_render_index: isize,
last_user_actual_index: usize,
thinking_mode: ThinkingMode,
drop_thinking: bool,
) -> Result<()> {
let render_index = message_index as isize + render_offset;
let opens_thinking = render_index == last_user_render_index;
let after_last_user_turn = render_index > last_user_render_index;
let after_or_at_last_user_turn = render_index >= last_user_render_index;
match message {
ChatMessage::System { content } => render_system_message(out, Some(content), &[]),
ChatMessage::Developer { content, tools } => render_developer_message(
out,
content,
tools.as_deref().unwrap_or(&[]),
thinking_mode == ThinkingMode::Thinking && opens_thinking,
),
ChatMessage::User { content } => render_user_message(
out,
content,
thinking_mode == ThinkingMode::Thinking && opens_thinking,
),
ChatMessage::Assistant { content } => render_assistant_message(
out,
thinking_mode == ThinkingMode::Thinking && after_last_user_turn,
content,
should_keep_assistant_reasoning(
message_index,
last_user_actual_index,
thinking_mode,
drop_thinking,
),
// TODO: Respect `continue_final_message` and map it to DeepSeek's
// prefix-style final-assistant continuation behavior.
false,
),
ChatMessage::ToolResponse { content, .. } => render_tool_message(
out,
messages,
message_index,
thinking_mode == ThinkingMode::Thinking && after_or_at_last_user_turn,
content,
),
}
}
/// Historical assistant reasoning is dropped in thinking mode when the final
/// request turn is a new user-like message.
fn should_keep_assistant_reasoning(
actual_index: usize,
last_user_actual_index: usize,
thinking_mode: ThinkingMode,
drop_thinking: bool,
) -> bool {
!(thinking_mode == ThinkingMode::Thinking
&& drop_thinking
&& actual_index < last_user_actual_index)
}
/// Return the last user/developer turn in the real request message list.
fn find_last_user_actual_index(messages: &[ChatMessage]) -> usize {
messages
.iter()
.rposition(|message| matches!(message.role(), ChatRole::User | ChatRole::Developer))
.unwrap_or(usize::MAX)
}
/// Render a system turn, optionally followed by the tool preamble.
fn render_system_message(
out: &mut String,
content: Option<&ChatContent>,
tools: &[ChatTool],
) -> Result<()> {
if let Some(content) = content {
write_chat_content(out, content)?;
}
if !tools.is_empty() {
out.push_str("\n\n");
render_tools(out, tools)?;
}
Ok(())
}
/// Developer messages are wrapped into the same user-like turn shape as real
/// user messages, but can also carry message-local tools.
fn render_developer_message(
out: &mut String,
content: &ChatContent,
tools: &[ChatTool],
opens_thinking: bool,
) -> Result<()> {
if content.is_empty() {
return Err(Error::ChatTemplate(
"invalid DeepSeek V3.2 developer message: empty content".to_string(),
));
}
out.push_str("<User>");
if !tools.is_empty() {
out.push_str("\n\n");
render_tools(out, tools)?;
}
out.push_str("\n\n# The user's message is: ");
write_chat_content(out, content)?;
write_user_like_suffix(out, opens_thinking);
Ok(())
}
/// Plain user turns share the same wrapper shape as developer turns without the
/// developer-specific preamble.
fn render_user_message(
out: &mut String,
content: &ChatContent,
opens_thinking: bool,
) -> Result<()> {
out.push_str("<User>");
write_chat_content(out, content)?;
write_user_like_suffix(out, opens_thinking);
Ok(())
}
/// Shared trailing wrapper used by both real user turns and native developer
/// turns after their content has already been written.
// TODO: respect `add_generation_prompt` option
fn write_user_like_suffix(out: &mut String, opens_thinking: bool) {
out.push_str("<Assistant>");
if opens_thinking {
out.push_str(THINKING_START_TOKEN);
} else {
out.push_str(THINKING_END_TOKEN);
}
}
/// Render one tool result turn and decide whether it opens or closes the shared
/// `<function_results>` block for the preceding assistant tool-call message.
fn render_tool_message(
out: &mut String,
messages: &[ChatMessage],
message_index: usize,
resumes_thinking: bool,
_content: &ChatContent,
) -> Result<()> {
let (block_start, block_end) = tool_response_block_bounds(messages, message_index);
if message_index != block_start {
return Ok(());
}
let Some(prev_assistant_idx) = previous_assistant_actual_index(messages, block_start) else {
return Err(Error::ChatTemplate(
"invalid DeepSeek V3.2 tool message: missing previous assistant message".to_string(),
));
};
let ChatMessage::Assistant {
content: assistant_content,
} = &messages[prev_assistant_idx]
else {
return Err(Error::ChatTemplate(
"invalid DeepSeek V3.2 tool message: previous non-tool message is not assistant"
.to_string(),
));
};
let assistant_tool_calls = assistant_content.tool_calls().collect::<Vec<_>>();
if assistant_tool_calls.is_empty() {
return Err(Error::ChatTemplate(
"invalid DeepSeek V3.2 tool message: previous assistant message has no tool calls"
.to_string(),
));
}
let mut expected_tool_call_ids = HashSet::with_capacity(assistant_tool_calls.len());
for tool_call in &assistant_tool_calls {
if !expected_tool_call_ids.insert(tool_call.id.as_str()) {
return Err(Error::ChatTemplate(
"invalid DeepSeek V3.2 assistant tool calls: duplicate tool_call_id".to_string(),
));
}
}
let mut tool_results_by_id = HashMap::with_capacity(assistant_tool_calls.len());
for message in &messages[block_start..block_end] {
let ChatMessage::ToolResponse {
content,
tool_call_id,
} = message
else {
unreachable!("tool response block should only contain tool messages");
};
if !expected_tool_call_ids.contains(tool_call_id.as_str()) {
return Err(Error::ChatTemplate(format!(
"invalid DeepSeek V3.2 tool message: unknown tool_call_id `{tool_call_id}`"
)));
}
if tool_results_by_id.insert(tool_call_id.as_str(), content).is_some() {
return Err(Error::ChatTemplate(format!(
"invalid DeepSeek V3.2 tool message: duplicate tool_call_id `{tool_call_id}`"
)));
}
}
if tool_results_by_id.len() != assistant_tool_calls.len() {
return Err(Error::ChatTemplate(
"invalid DeepSeek V3.2 tool messages: missing tool result for assistant tool call"
.to_string(),
));
}
out.push_str("\n\n<function_results>");
for tool_call in assistant_tool_calls {
let content = tool_results_by_id
.get(tool_call.id.as_str())
.expect("validated tool_call_id set should be complete");
out.push_str("\n<result>");
write_chat_content(out, content)?;
out.push_str("</result>");
}
out.push_str("\n</function_results>");
out.push_str("\n\n");
if resumes_thinking {
out.push_str(THINKING_START_TOKEN);
} else {
out.push_str(THINKING_END_TOKEN);
}
Ok(())
}
/// Return the contiguous tool-response block containing `actual_index`.
fn tool_response_block_bounds(messages: &[ChatMessage], actual_index: usize) -> (usize, usize) {
let mut block_start = actual_index;
while block_start > 0 && matches!(messages[block_start - 1], ChatMessage::ToolResponse { .. }) {
block_start -= 1;
}
let mut block_end = actual_index + 1;
while block_end < messages.len()
&& matches!(messages[block_end], ChatMessage::ToolResponse { .. })
{
block_end += 1;
}
(block_start, block_end)
}
/// Return the most recent assistant turn before `actual_index`.
fn previous_assistant_actual_index(messages: &[ChatMessage], actual_index: usize) -> Option<usize> {
messages[..actual_index]
.iter()
.rposition(|message| matches!(message, ChatMessage::Assistant { .. }))
}
/// Render one assistant turn, including optional reasoning, DSML tool calls,
/// and the trailing EOS marker.
fn render_assistant_message(
out: &mut String,
after_last_user_turn: bool,
content: &[AssistantContentBlock],
keep_reasoning: bool,
prefix: bool,
) -> Result<()> {
let has_reasoning = keep_reasoning && content.has_reasoning();
let has_tool_calls = content.has_tool_calls();
if !has_tool_calls && prefix {
write_assistant_text(out, content);
return Ok(());
}
if after_last_user_turn {
if !has_reasoning && !has_tool_calls {
return Err(Error::ChatTemplate(
"invalid DeepSeek V3.2 assistant message after last user message: expected reasoning or tool calls"
.to_string(),
));
}
if has_reasoning {
write_assistant_reasoning(out, content);
}
out.push_str(THINKING_END_TOKEN);
}
write_assistant_text(out, content);
if has_tool_calls {
out.push_str("\n\n<DSMLfunction_calls>\n");
for (index, tool_call) in content.tool_calls().enumerate() {
if index > 0 {
out.push('\n');
}
render_tool_call(out, tool_call)?;
}
out.push_str("\n</DSMLfunction_calls>");
}
out.push_str(EOS_TOKEN);
Ok(())
}
/// Render one assistant tool call in DSML XML-like format.
fn render_tool_call(out: &mut String, tool_call: &AssistantToolCall) -> Result<()> {
writeln!(out, "<{DSML_TOKEN}invoke name=\"{}\">", tool_call.name)
.expect("writing to String cannot fail");
encode_arguments_to_dsml(out, tool_call)?;
write!(out, "\n</{DSML_TOKEN}invoke>").expect("writing to String cannot fail");
Ok(())
}
/// Convert one assistant tool-call arguments object into DSML parameter form.
///
/// String values are emitted raw with `string="true"`, while all other JSON
/// values are rendered with JSON syntax and `string="false"`.
fn encode_arguments_to_dsml(out: &mut String, tool_call: &AssistantToolCall) -> Result<()> {
let arguments: Value = serde_json::from_str(&tool_call.arguments).map_err(|error| {
Error::ChatTemplate(format!(
"assistant tool call has invalid JSON arguments for DeepSeek V3.2: {error}"
))
})?;
let Some(arguments) = arguments.as_object() else {
return Err(Error::ChatTemplate(
"assistant tool call arguments for DeepSeek V3.2 must be a JSON object".to_string(),
));
};
let mut wrote_parameter = false;
for (key, value) in arguments {
if wrote_parameter {
out.push('\n');
}
let is_string = matches!(value, Value::String(_));
write!(
out,
"<{DSML_TOKEN}parameter name=\"{key}\" string=\"{}\">",
if is_string { "true" } else { "false" }
)
.expect("writing to String cannot fail");
match value {
Value::String(value) => out.push_str(value),
value => out.push_str(&json_dumps(value)?),
}
write!(out, "</{DSML_TOKEN}parameter>").expect("writing to String cannot fail");
wrote_parameter = true;
}
Ok(())
}
/// Render the full tool preamble shown to the model.
fn render_tools(out: &mut String, tools: &[ChatTool]) -> Result<()> {
out.push_str(
r#"## Tools
You have access to a set of tools you can use to answer the user's question.
You can invoke functions by writing a "<DSMLfunction_calls>" block like the following as part of your reply to the user:
<DSMLfunction_calls>
<DSMLinvoke name="$FUNCTION_NAME">
<DSMLparameter name="$PARAMETER_NAME" string="true|false">$PARAMETER_VALUE</DSMLparameter>
...
</DSMLinvoke>
<DSMLinvoke name="$FUNCTION_NAME2">
...
</DSMLinvoke>
</DSMLfunction_calls>
String and scalar parameters should be specified as is without any escaping or quotes, while lists and objects should use JSON format. The "string" attribute should be set to "true" for string type parameters and "false" for other types (numbers, booleans, arrays, objects).
If the thinking_mode is enabled, then after function results you should strongly consider outputting a thinking block. Here is an example:
<DSMLfunction_calls>
...
</DSMLfunction_calls>
<function_results>
...
</function_results>
<think>...thinking about results</think>
Here are the functions available in JSONSchema format:
<functions>
"#,
);
for (index, tool) in tools.iter().enumerate() {
if index > 0 {
out.push('\n');
}
render_tool_schema(out, tool)?;
}
out.push_str("\n</functions>\n");
Ok(())
}
/// Serialize one typed tool schema into the JSON shape embedded inside
/// `<functions>`.
fn render_tool_schema(out: &mut String, tool: &ChatTool) -> Result<()> {
out.push_str(&json_dumps(&RenderedToolSchema {
name: &tool.name,
description: tool.description.as_deref(),
parameters: &tool.parameters,
strict: tool.strict,
})?);
Ok(())
}
/// Write chat content directly into the destination buffer without flattening
/// it into an intermediate `String`.
fn write_chat_content(out: &mut String, content: &ChatContent) -> Result<()> {
match content {
ChatContent::Text(text) => out.push_str(text),
ChatContent::Parts(parts) => {
for part in parts {
out.push_str(part.as_text()?);
}
}
}
Ok(())
}
/// Write all reasoning blocks in encounter order.
fn write_assistant_reasoning(out: &mut String, content: &[AssistantContentBlock]) {
for block in content {
if let AssistantContentBlock::Reasoning { text } = block {
out.push_str(text);
}
}
}
/// Write all visible assistant text blocks in encounter order.
fn write_assistant_text(out: &mut String, content: &[AssistantContentBlock]) {
for block in content {
if let AssistantContentBlock::Text { text } = block {
out.push_str(text);
}
}
}
/// Compact JSON serialization used by this renderer for exact prompt text.
fn json_dumps<T: Serialize>(value: &T) -> Result<String> {
JsonFormat::new()
.comma(", ")
.expect("literal comma separator is valid JSON")
.colon(": ")
.expect("literal colon separator is valid JSON")
.ascii(false)
.format_to_string(value)
.map_err(|error| {
Error::ChatTemplate(format!(
"failed to serialize DeepSeek V3.2 JSON payload: {error}"
))
})
}
@@ -0,0 +1,149 @@
{
"tools": [
{
"type": "function",
"function": {
"name": "get_datetime",
"description": "Get the current date and time",
"parameters": {
"type": "object",
"properties": {
"timezone": {
"type": "string",
"description": "The timezone, e.g. Asia/Shanghai, UTC"
}
},
"required": ["timezone"]
}
}
},
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the weather for a specific date and location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city name, e.g. Beijing, Hangzhou"
},
"date": {
"type": "string",
"description": "The date in YYYY-MM-DD format"
}
},
"required": ["location", "date"]
}
}
}
],
"messages": [
{
"role": "system",
"content": "You are a helpful Assistant."
},
{
"role": "user",
"content": "明天杭州和北京的天气怎么样?"
},
{
"role": "assistant",
"reasoning_content": "用户询问明天的天气,我需要先获取当前日期来计算明天的日期📅",
"tool_calls": [
{
"id": "call_xK9mN3pL2qR8vT5wY6hZ1aB4",
"type": "function",
"function": {
"arguments": "{\"timezone\": \"Asia/Shanghai\"}",
"name": "get_datetime"
}
}
]
},
{
"tool_call_id": "call_xK9mN3pL2qR8vT5wY6hZ1aB4",
"role": "tool",
"content": "{\"current_date\": \"2024-01-15\", \"current_time\": \"14:30:00\", \"timezone\": \"Asia/Shanghai\"}"
},
{
"role": "assistant",
"reasoning_content": "现在知道今天是2024-01-15,明天就是2024-01-16。接下来查询杭州和北京明天的天气🌤️",
"tool_calls": [
{
"id": "call_bN7kR9mX3pQ2wL5vY8jZ4cD6",
"type": "function",
"function": {
"arguments": "{\"location\": \"Hangzhou\", \"date\": \"2024-01-16\"}",
"name": "get_weather"
}
},
{
"id": "call_dP9mL7kX5rT4yN3wZ2hV8eF1",
"type": "function",
"function": {
"arguments": "{\"location\": \"Beijing\", \"date\": \"2024-01-16\"}",
"name": "get_weather"
}
}
]
},
{
"tool_call_id": "call_bN7kR9mX3pQ2wL5vY8jZ4cD6",
"role": "tool",
"content": "{\"location\": \"Hangzhou\", \"date\": \"2024-01-16\", \"temperature_high\": \"12\", \"temperature_low\": \"5\", \"weather\": \"多云\", \"humidity\": \"65%\"}"
},
{
"tool_call_id": "call_dP9mL7kX5rT4yN3wZ2hV8eF1",
"role": "tool",
"content": "{\"location\": \"Beijing\", \"date\": \"2024-01-16\", \"temperature_high\": \"-2\", \"temperature_low\": \"-8\", \"weather\": \"晴\", \"humidity\": \"30%\"}"
},
{
"role": "assistant",
"reasoning_content": "已获取两个城市明天的天气信息,现在整理给用户✨",
"content": "根据查询结果,明天(2024年1月16日)的天气情况如下:\n\n**杭州**:\n- 天气:多云\n- 最高温度:12°C\n- 最低温度:5°C\n- 湿度:65%\n\n**北京**\n- 天气:晴\n- 最高温度:-2°C\n- 最低温度:-8°C\n- 湿度:30%\n\n杭州明天会比较温暖但有些多云,而北京会很冷但是晴天。建议在北京的朋友要注意保暖!"
},
{
"role": "user",
"content": "谢谢!那后天呢?"
},
{
"role": "assistant",
"reasoning_content": "用户现在问后天的天气,后天是2024-01-17,我可以直接查询(因为已知今天日期)🗓️",
"tool_calls": [
{
"id": "call_fR3nK8mV7pL4xT2yW9jB5gH3",
"type": "function",
"function": {
"arguments": "{\"location\": \"Hangzhou\", \"date\": \"2024-01-17\"}",
"name": "get_weather"
}
},
{
"id": "call_hT5pN2kY9rV6zL3wX1mD7jK8",
"type": "function",
"function": {
"arguments": "{\"location\": \"Beijing\", \"date\": \"2024-01-17\"}",
"name": "get_weather"
}
}
]
},
{
"tool_call_id": "call_fR3nK8mV7pL4xT2yW9jB5gH3",
"role": "tool",
"content": "{\"location\": \"Hangzhou\", \"date\": \"2024-01-17\", \"temperature_high\": \"15\", \"temperature_low\": \"8\", \"weather\": \"小雨\", \"humidity\": \"80%\"}"
},
{
"tool_call_id": "call_hT5pN2kY9rV6zL3wX1mD7jK8",
"role": "tool",
"content": "{\"location\": \"Beijing\", \"date\": \"2024-01-17\", \"temperature_high\": \"0\", \"temperature_low\": \"-6\", \"weather\": \"多云\", \"humidity\": \"45%\"}"
},
{
"role": "assistant",
"reasoning_content": "获取到后天的天气数据,整理回复给用户📝",
"content": "后天(2024年1月17日)的天气情况:\n\n**杭州**:\n- 天气:小雨\n- 最高温度:15°C\n- 最低温度:8°C\n- 湿度:80%\n\n**北京**\n- 天气:多云\n- 最高温度:0°C\n- 最低温度:-6°C\n- 湿度:45%\n\n杭州后天会有小雨,温度略有回升,记得带伞。北京会稍微暖和一点,但依然很冷,请继续做好保暖措施。"
}
]
}
File diff suppressed because one or more lines are too long
@@ -0,0 +1,533 @@
{
"messages": [
{
"role": "developer",
"content": "In a year before 2023, between December 1 and 15 (inclusive), a press release reported that as of the 5th of that month, a country had received a total of 16,201,670 vaccine doses, with 7,583,134 administered. That same year, a company's annual report revealed that its nearly 2.2 million members and supporters contributed over $49 million, with 12% of total revenue coming from contributed services. The following year, the annual report stated that the organization had played a role in rescinding a previous administrations plan to open most of the 22 million acres of the National Petroleum Reserve in Alaska to oil and gas drilling and that contributed services had decreased by 2% to 10%. Who was the emeritus board member that year? Name only, no titles.",
"tools": [
{
"type": "function",
"function": {
"name": "search",
"description": "Searches for information related to query and displays topn results.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The search query string"
},
"topn": {
"type": "integer",
"description": "Number of top results to display",
"default": 10
},
"source": {
"type": "string",
"description": "Source to search within",
"enum": [
"web",
"news"
],
"default": "web"
}
},
"required": [
"query"
],
"additionalProperties": false,
"$schema": "http://json-schema.org/draft-07/schema#"
}
}
},
{
"type": "function",
"function": {
"name": "open",
"description": "Opens the link id from the page indicated by cursor starting at line number loc, showing num_lines lines. Valid link ids are displayed with the formatting: 【{id}†.*】. If cursor is not provided, the most recent page is implied. If id is a string, it is treated as a fully qualified URL associated with source. If loc is not provided, the viewport will be positioned at the beginning of the document or centered on the most relevant passage, if available. Use this function without id to scroll to a new location of an opened page.",
"parameters": {
"type": "object",
"properties": {
"id": {
"description": "Link ID (number) or fully qualified URL (string)",
"anyOf": [
{
"type": "integer"
},
{
"type": "string"
}
],
"default": -1
},
"cursor": {
"type": "integer",
"description": "Page cursor indicator",
"default": -1
},
"loc": {
"type": "integer",
"description": "Starting line number",
"default": -1
},
"num_lines": {
"type": "integer",
"description": "Number of lines to show",
"default": -1
},
"view_source": {
"type": "boolean",
"description": "Whether to view source",
"default": false
},
"source": {
"type": "string",
"description": "Source associated with the URL",
"enum": [
"web",
"news"
],
"default": "web"
}
},
"additionalProperties": false,
"$schema": "http://json-schema.org/draft-07/schema#"
}
}
},
{
"type": "function",
"function": {
"name": "find",
"description": "Finds exact matches of pattern in the current page, or the page given by cursor.",
"parameters": {
"type": "object",
"properties": {
"pattern": {
"type": "string",
"description": "The exact pattern to search for"
},
"cursor": {
"type": "integer",
"description": "Page cursor to search in",
"default": -1
}
},
"required": [
"pattern"
],
"additionalProperties": false,
"$schema": "http://json-schema.org/draft-07/schema#"
}
}
},
{
"type": "function",
"function": {
"name": "python",
"description": "Use this tool to execute Python code. The code will not be shown to the user. This tool should be used for internal reasoning, but not for code that is intended to be visible to the user (e.g. when creating plots, tables, or files). When you send a message containing Python code to python, it will be executed in a stateful Jupyter notebook environment. python will respond with the output of the execution or time out after 120.0 seconds.",
"parameters": {
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "The python code to execute in the jupyter environment."
}
},
"required": [
"code"
],
"additionalProperties": false,
"$schema": "http://json-schema.org/draft-07/schema#"
}
}
}
]
},
{
"role": "assistant",
"content": "",
"reasoning_content": "The user asks: \"In a year before 2023, between December 1 and 15 (inclusive), a press release reported that as of the 5th of that month, a country had received a total of 16,201,670 vaccine doses, with 7,583,134 administered. That same year, a company's annual report revealed that its nearly 2.2 million members and supporters contributed over $49 million, with 12% of total revenue coming from contributed services. The following year, the annual report stated that the organization had played a role in rescinding a previous administrations plan to open most of the 22 million acres of the National Petroleum Reserve in Alaska to oil and gas drilling and that contributed services had decreased by 2% to 10%. Who was the emeritus board member that year? Name only, no titles.\"\n\nWe need to parse the puzzle.\n\nWe have clues about a press release about vaccine doses for a country, dated between Dec 1-15 inclusive, reporting as of 5th of that month total doses received 16,201,670 and administered 7,583,134. That is likely a COVID-19 vaccine update for some country. Which country? Might be a smaller country. Could be something like \"Uganda\", \"Zimbabwe\", \"Kenya\", \"Philippines\", etc. Let's search.\n\nSearch for \"16,201,670 vaccine doses\" \"7,583,134 administered\".",
"tool_calls": [
{
"id": "fixture-tool-call-1-0",
"type": "function",
"function": {
"name": "search",
"arguments": "{\"query\": \"\\\"16,201,670\\\" vaccine doses\", \"topn\": 10, \"source\": \"web\"}"
}
}
]
},
{
"role": "tool",
"content": "[0] \"16,201,670\" vaccine doses (Search_Results/\"16,201,670\" vaccine doses)\n**viewing lines [0 - 38] of 38**\n\nL0: \nL1: URL: Search_Results/\"16,201,670\" vaccine doses\nL2: # Search Results\nL3: \nL4: * 【0†Kenya's Economy is Showing Resilience as Output Rises ...; publish_date: \nL5: none†www.worldbank.org】 Dec 14, 2021 — As of December 5, 2021, Kenya had \nL6: received a total of 16,201,670 vaccines, with 7,583,134 administered. While \nL7: vaccine acceptance is ...\nL8: * 【1†MINISTRY OF HEALTH; publish_date: none†covidhub.mediacouncil.or.ke】 Dec \nL9: 1, 2021 — Total Doses Received 16,201,670. Table 10 gives the total vaccines \nL10: received since the start of Covid -19 vaccination exercise in the country.\nL11: * 【2†Output Result Page; publish_date: none†open.unicef.org】 ... 16,201,670 \nL12: doses of multiple vaccines nationwide and full vaccination of 15.5 per cent with\nL13: two doses of COVID-19 vaccine as of 31 December 2021.\nL14: * 【3†rebased GDP; publish_date: none†documents1.worldbank.org】 Dec 7, 2021 — \nL15: As of December 5, 2021,. Kenya had received a total of 16,201,670 vaccines, \nL16: with. 7,583,134 administered. Vaccine acceptance is reportedly high.\nL17: * 【4†Integrated Annual Report; publish_date: none†www.co-opbank.co.ke】 May 27,\nL18: 2022 — ... doses of Covid-19 vaccines and administered close to 17 million ... \nL19: 16,201,670, huku 7,583,134 zikiwa tayari zimedungwa watu. Bado kuna ...\nL20: * 【5†World Bank lifts Kenya's growth prospect to 5% in 2021; publish_date: \nL21: none†www.africa-press.net】 ... 16,201,670 vaccines, with 7,583,134 administered.\nL22: While vaccine acceptance is reportedly high there is still a long way to go \nL23: towards the government's ...\nL24: * 【6†2020 Annual Report United States; publish_date: none†www.hi-us.org】 \nL25: including medicine, vaccines, Covid-19 prevention kits, and kits of ... \nL26: 16,201,670. 9,630,584. Prepaid expenses. 56,707. 22,601. Total current assets.\nL27: * 【7†HOSPITAL CENTRAL DE LA DEFENSA; publish_date: \nL28: none†contrataciondelestado.es】 16201670 X. URPA-URPA. 0. HCD-000393/00. KONTRON.\nL29: 7000 ... LASER SHOT LBP-1120. CNEUR-C. NEUROFISIOLOGÍA. 0 ... VICTOREEN CT HEAD\nL30: DOSE PHANTON. 43650-014.\nL31: * 【8†body metal concentrations: Topics by ...; publish_date: \nL32: none†www.science.gov】 PMID:16201670. Design Characteristics and Tobacco Metal \nL33: Concentrations in Filtered Cigars. PubMed. Caruso, Rosalie V; O'Connor, Richard \nL34: J; Travers, Mark J ...\nL35: * 【9†Public Offices, Personal Demands: Capability in ...; publish_date: \nL36: none†dokumen.pub】 ... (16201670).9 The Institutiones Politicae consists of two \nL37: books. Book one deals with the origin, nature and goal of the commonwealth on \nL38: the one hand, and the ...",
"tool_call_id": "fixture-tool-call-1-0"
},
{
"role": "assistant",
"content": "",
"reasoning_content": "Result shows Kenya. So the press release is about Kenya's vaccine doses as of Dec 5, 2021. The press release likely by World Bank or something. The search result shows \"World Bank lifts Kenya's growth prospect...\" maybe a press release dated Dec 14, 2021. But the clue says \"between December 1 and 15 (inclusive), a press release reported that as of the 5th of that month, a country had received a total of 16,201,670 vaccine doses, with 7,583,134 administered.\" That matches Kenya on Dec 5, 2021. So the year is 2021.\n\nNow the next clue: \"That same year, a company's annual report revealed that its nearly 2.2 million members and supporters contributed over $49 million, with 12% of total revenue coming from contributed services.\"\n\nThis sounds like an environmental or conservation organization. \"Nearly 2.2 million members and supporters contributed over $49 million.\" That might be something like \"The Nature Conservancy\", \"World Wildlife Fund\", \"Sierra Club\", \"Environmental Defense Fund\", \"Natural Resources Defense Council\", etc. Let's search.\n\nSearch for \"nearly 2.2 million members and supporters contributed over $49 million\" \"12% of total revenue coming from contributed services\".",
"tool_calls": [
{
"id": "fixture-tool-call-3-0",
"type": "function",
"function": {
"name": "search",
"arguments": "{\"query\": \"\\\"nearly 2.2 million members\\\" \\\"contributed over $49 million\\\"\", \"topn\": 10, \"source\": \"web\"}"
}
}
]
},
{
"role": "tool",
"content": "[1] \"nearly 2.2 million members\" \"contributed over $49 million\" (Search_Results/\"nearly 2.2 million members\" \"contributed over $49 million\")\n**viewing lines [0 - 41] of 41**\n\nL0: \nL1: URL: Search_Results/\"nearly 2.2 million members\" \"contributed over $49 million\"\nL2: # Search Results\nL3: \nL4: * 【0†The Global Good Fund, Microsoft and Goodwill join forces ...; \nL5: publish_date: none†globalgoodfund.org】 Sep 28, 2022 — ... more than 2.2 million \nL6: people. They additionally contribute close to $49 billion in local, state and \nL7: federal tax revenues. When COVID hit ...\nL8: * 【1†Almost 22 billion American tax dollars spent to wipe out a ...; \nL9: publish_date: none†www.facebook.com】 US military funding for Israel's war crimes\nL10: in Lebanon and Gaza has now cost US taxpayers over $22 billion. When millions \nL11: struggle to afford the ...\nL12: * 【2†Corporate America has largely abandoned its post-January ...; \nL13: publish_date: none†www.citizensforethics.org】 Jul 29, 2025 — Since the January 6\nL14: insurrection, over 2,000 corporate and industry group PACs have given over $174\nL15: million to members of the Sedition ...\nL16: * 【3†Audit shows millions in questionable taxpayer spending at ...; \nL17: publish_date: none†www.aol.com】 18 hours ago — ... nearly doubled from 1.3 \nL18: million to about 2.2 million. That is more than one in four Washington state \nL19: residents receiving Medicaid, and the ...\nL20: * 【4†Incarceration and Poverty in the United States - AAF; publish_date: \nL21: none†www.americanactionforum.org】 Jun 30, 2020 — The United States currently \nL22: incarcerates 2.2 million people, nearly half of whom are non-violent drug \nL23: offenders, accused people held pre ...\nL24: * 【5†How the NRA Spends Revenue (2023); publish_date: none†paddockpost.com】 \nL25: Apr 25, 2025 — In 2023, the NRA reported total revenue of $176 million (compared\nL26: to $211 million in 2022, $227 million in 2021, $282 million in 2020, $292 ...\nL27: * 【6†NCA Newsroom; publish_date: none†www.ncausa.org】 ... 2.2 million U.S. \nL28: jobs—operating in every U.S. state and territory and contributing nearly $350 \nL29: billion to the U.S. economy every year. For media inquiries ...\nL30: * 【7†The Early Childhood Educator Workforce; publish_date: \nL31: none†cscce.berkeley.edu】 About 2.2 million adults are paid to care for and \nL32: educate more than 9.7 million children between birth and age five in center-\nL33: based and home-based settings.\nL34: * 【8†Bringing Out the Worst in Congress: CRA By the Numbers ...; publish_date:\nL35: none†progressivereform.org】 Jul 14, 2025 — In fact, the top ten recipients (out\nL36: of 25) account for almost $7 million, showing how these contributions are \nL37: targeted and strategic. Real ...\nL38: * 【9†Club 100: Over 100 Americans gave more than $100000 to ...; publish_date:\nL39: none†publicintegrity.org】 Jan 7, 2022 — Through Internet investments during the\nL40: 1990s, he attracted 2.2 million online accounts valued at $174 million. Schwab \nL41: has been a vocal ...",
"tool_call_id": "fixture-tool-call-3-0"
},
{
"role": "assistant",
"content": "",
"reasoning_content": "Not that. Try search for \"2.2 million members and supporters contributed over $49 million\".",
"tool_calls": [
{
"id": "fixture-tool-call-5-0",
"type": "function",
"function": {
"name": "search",
"arguments": "{\"query\": \"\\\"2.2 million members\\\" \\\"contributed over $49\\\"\", \"topn\": 10, \"source\": \"web\"}"
}
}
]
},
{
"role": "tool",
"content": "[2] \"2.2 million members\" \"contributed over $49\" (Search_Results/\"2.2 million members\" \"contributed over $49\")\n**viewing lines [0 - 2] of 2**\n\nL0: \nL1: URL: Search_Results/\"2.2 million members\" \"contributed over $49\"\nL2: # Search Results",
"tool_call_id": "fixture-tool-call-5-0"
},
{
"role": "assistant",
"content": "",
"reasoning_content": "Maybe it's an annual report of a non-profit. Search for \"annual report 2.2 million members contributed $49 million\".",
"tool_calls": [
{
"id": "fixture-tool-call-7-0",
"type": "function",
"function": {
"name": "search",
"arguments": "{\"query\": \"\\\"2.2 million members\\\" \\\"annual report\\\" $49 million\", \"topn\": 10, \"source\": \"web\"}"
}
}
]
},
{
"role": "tool",
"content": "[3] \"2.2 million members\" \"annual report\" $49 million (Search_Results/\"2.2 million members\" \"annual report\" $49 million)\n**viewing lines [0 - 36] of 36**\n\nL0: \nL1: URL: Search_Results/\"2.2 million members\" \"annual report\" $49 million\nL2: # Search Results\nL3: \nL4: * 【0†20-F; publish_date: none†www.sec.gov】 ANNUAL REPORT PURSUANT TO SECTION \nL5: ... Our membership grew from 2.1 million members as at December 31, 2023 to 2.2 \nL6: million members as at December 31, 2024.\nL7: * 【1†Oportun Reports Fourth Quarter and Full Year 2023 ...; publish_date: \nL8: none†investor.oportun.com】 Mar 12, 2024 — Oportun (Nasdaq: OPRT) is a mission-\nL9: driven fintech that puts its 2.2 million members' financial goals within reach. \nL10: ... annual report on ...\nL11: * 【2†2 0 21; publish_date: none†www.annualreports.com】 ANNUAL REPORT. 2. 0. \nL12: 21. 2. 0. 21. Page 2. 2. DEFENDERS OF WILDLIFE. 2. 0. 21. 2. 0. 21 ... In 2021, \nL13: Defenders of Wildlife's nearly 2.2 million members and.\nL14: * 【3†Annual report and accounts 2020; publish_date: none†www.3i.com】 \nL15: Disclaimer. The Annual report and accounts have been prepared solely to provide \nL16: information to shareholders. ... 2.2 million members. In December 2019, we sold \nL17: ...\nL18: * 【4†united states securities and exchange commission; publish_date: \nL19: none†s21.q4cdn.com】 Dec 14, 2018 — ☒ ANNUAL REPORT ... approximately 5.7 million\nL20: Members, including approximately 2.2 million Members from the acquisition of \nL21: Best Doctors.\nL22: * 【5†securities and exchange commission; publish_date: none†www.sec.gov】 ... \nL23: Annual Report on Form 10-K filed with the SEC on March 10, 2020. The ... 2.2 \nL24: million members at June 30, 2020. The following table highlights the ...\nL25: * 【6†2022-23 Annual Comprehensive Financial Report - CalPERS; publish_date: \nL26: none†www.calpers.ca.gov】 Nov 17, 2023 — ... annual report is consistent with \nL27: these accounting principles ... 2.2 million members and health benefit services \nL28: to over 1.5 million ...\nL29: * 【7†Medical Centers Report; publish_date: \nL30: none†regents.universityofcalifornia.edu】 Oct 8, 2019 — which will allow its 2.2 \nL31: million members to receive advanced and ... annual report of the University of \nL32: California. For the Medical ...\nL33: * 【8†Working TogeTher to make HealtH care better; publish_date: \nL34: none†www.unitedhealthgroup.com】 Feb 8, 2012 — ANNUAL REPORT PURSUANT TO SECTION \nL35: 13 OR 15(d) OF THE ... approximately 2.2 million members enrolled in its \nL36: Medicare. Advantage ...",
"tool_call_id": "fixture-tool-call-7-0"
},
{
"role": "assistant",
"content": "",
"reasoning_content": "Result 2 shows \"Defenders of Wildlife\". That might be the organization. Let's open.",
"tool_calls": [
{
"id": "fixture-tool-call-9-0",
"type": "function",
"function": {
"name": "open",
"arguments": "{\"cursor\": 3, \"id\": 2}"
}
}
]
},
{
"role": "tool",
"content": "[4] www.annualreports.com (https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-wildlife_2021.pdf)\n**viewing lines [0 - 53] of 261**\n\nL0: \nL1: URL: https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-\nL2: wildlife_2021.pdf\nL3: ===== Page 1 ===== 2021 ANNUAL REPORT ===== Page 2 ===== # DEFENDERS OF WILDLIFE\nL4: made important progress for imperiled species and vital landscapes across the \nL5: United States in 2021. \\--- **LAWYERS** immediately **challenged** the premature\nL6: and reckless decision to strip **gray wolves** of federal **Endangered Species \nL7: Act (ESA)** protections. For many decades, Defenders has led the effort to \nL8: protect and restore the gray wolf, and we will continue to fight the \nL9: unscientific and hostile anti-wolf policies that impede conservation progress \nL10: and will carry on our unrelenting battle to restore federal protections for this\nL11: iconic keystone species. \\--- **LOBBYISTS** worked around the clock to keep \nL12: wildlife and climate priorities in the **Infrastructure Investment and Jobs \nL13: Act**. We also continue fighting to keep important wildlife and habitat funding \nL14: in relevant **appropriations bills**. \\--- 2 DEFENDERS OF WILDLIFE ===== Page 3 \nL15: ===== POLICY EXPERTS pushed forward on the urgent need for a National \nL16: Biodiversity Strategy (NBS), an all-of-government approach to address the \nL17: unprecedented loss of wildlife and habitat we are experiencing. We have coupled \nL18: this with our new campaign to expand the National Wildlife Refuge System to \nL19: preserve our nations only lands set aside for wildlife. By defending, funding \nL20: and expanding our national wildlife refuges, we will directly address \nL21: biodiversity loss and climate change while promoting increased equitable access \nL22: to nature. FIELD TEAMS were on the ground helping to recover imperiled species. \nL23: From panthers and sea turtles in Florida to wolves, bison and black-footed \nL24: ferrets in Montana, Defenders conservation experts were in the field saving \nL25: wildlife all over the country. CONSERVATION INNOVATION EXPERTS provided \nL26: comprehensive analyses to guide policy and inform conservation strategies to \nL27: reach the goal of protecting 30% of our terrestrial and marine systems by 2030 \nL28: (“30x30”). Defenders Center for Conservation Innovation (CCI) produced a report\nL29: which details actions we need to take to achieve 30x30 while protecting \nL30: biodiversity and addressing the climate crisis. DEFENDERS.ORG ===== Page 4 =====\nL31: WE HAVE ACCOMPLISHED MUCH THIS YEAR WORKING WITH AN ADMINISTRATION THAT VALUES \nL32: SCIENCE AND CONSERVATION. Our many successes include the return of protections \nL33: to the Tongass National Forest in Alaska and the suspension of oil and gas \nL34: leasing permits for the Arctic National Wildlife Refuge. Defenders also played a\nL35: lead role in successfully defending the Migratory Bird Protection Act from \nL36: attack and securing critical protections for migratory birds. Throughout 2021 \nL37: Defenders made important progress for the wildlife and wild places we all love, \nL38: yet our nations wildlife still face unprecedented challenges. More and more \nL39: species, including manatees, Mexican gray wolves, polar bears and North Atlantic\nL40: right whales, face the very real threat of extinction because of climate \nL41: change, habitat loss, pollution and inadequate protections. In our work we \nL42: continue to demonstrate success by following the science, building partnerships,\nL43: leading with innovative solutions and focusing on collaborative and practical \nL44: approaches in defense of nature. Together we can make a lasting difference for \nL45: wildlife and habitats. Your support is essential to our work and we couldnt do \nL46: it without you. Thank you for being an important part of our efforts to protect \nL47: imperiled wildlife across our nation. Jamie Rappaport Clark President and CEO \nL48: John Dayton Board Chair OUR NEW TELEVISION SERIES, WILDLIFE NATION WITH JEFF \nL49: CORWIN, premiered in October and airs every weekend on ABC stations nationwide. \nL50: This groundbreaking series takes Jeff across the country to meet Defenders \nL51: experts along with our science, tribal and community partners to educate and \nL52: inspire viewers to join with Defenders to protect wildlife. While highlighting \nL53: some of our important conservation work and raising awareness of important ",
"tool_call_id": "fixture-tool-call-9-0"
},
{
"role": "assistant",
"content": "",
"reasoning_content": "Search within PDF for \"2.2 million members\".",
"tool_calls": [
{
"id": "fixture-tool-call-11-0",
"type": "function",
"function": {
"name": "find",
"arguments": "{\"cursor\": 4, \"pattern\": \"2.2\"}"
}
}
]
},
{
"role": "tool",
"content": "[5] Find results for text: `2.2` in `www.annualreports.com` (https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-wildlife_2021.pdf/find?pattern=2.2)\n**viewing lines [0 - 4] of 4**\n\nL0: # 【0†match at L187】\nL1: 2021, Defenders of Wildlifes nearly 2.2 million members and supporters \nL2: contributed more than $49 million for wildlife and wildlife habitat. This figure\nL3: includes all those who generously donated their time and expertise. The \nL4: steadfast support of our donors allows Defenders to sustain our program and ",
"tool_call_id": "fixture-tool-call-11-0"
},
{
"role": "assistant",
"content": "",
"reasoning_content": "Open around L180.",
"tool_calls": [
{
"id": "fixture-tool-call-13-0",
"type": "function",
"function": {
"name": "open",
"arguments": "{\"cursor\": 4, \"loc\": 170}"
}
}
]
},
{
"role": "tool",
"content": "[6] www.annualreports.com (https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-wildlife_2021.pdf)\n**viewing lines [170 - 215] of 261**\n\nL170: into the wild. **10 DEFENDERS OF WILDLIFE** ===== Page 11 ===== AFTER ANOTHER \nL171: SUCCESSFUL DEFENDERS LAWSUIT ON BEHALF OF THE CRITICALLY ENDANGERED RED WOLF, \nL172: FWS reversed its decision to limit the recovery area and committed to a robust \nL173: release strategy. After years of inaction, FWS released eight wolves to the wild\nL174: in North Carolina and plan to release nine more wolves in the spring of 2022. \nL175: This is an incredible win for this critically endangered species whose \nL176: population has dwindled down to single digits in the wild because of \nL177: mismanagement, vehicle strikes and poaching. DEFENDERS CONTINUED TO LEAD EFFORTS\nL178: TO PROTECT THE FLORIDA MANATEE, a beloved species that suffered the deadliest \nL179: year on record in 2021, tragically surpassing 1,000 deaths because of water \nL180: pollution and lack of warm water habitat. Defenders led advocacy and education \nL181: aimed at restoring the natural flow of the dammed Ocklawaha River, which would \nL182: provide critical warm-water habitat that manatees need to survive. Defenders \nL183: legal team continued to fight for manatees in the courts, holding government \nL184: agencies accountable for protecting critical habitat and addressing the \nL185: devastating water pollution that is killing the seagrass and causing manatees to\nL186: starve. DAVID TES | SAM FRENZY DRAW DEFENDERS.ORG 11 ===== Page 12 ===== In \nL187: 2021, Defenders of Wildlifes nearly 2.2 million members and supporters \nL188: contributed more than $49 million for wildlife and wildlife habitat. This figure\nL189: includes all those who generously donated their time and expertise. The \nL190: steadfast support of our donors allows Defenders to sustain our program and \nL191: public education efforts in the field, the courts and on Capitol Hill. 2021 \nL192: SOURCES OF FUNDS Grants and contributions $29,057 Bequests, trusts and split \nL193: interests $7,692 Income from investments, annuity reserve funds and trusts \nL194: $3,354 Royalties and other income $3,576 Contributed services $6,140 Total \nL195: Revenue $49,819 USES OF FUNDS Biodiversity conservation $22,420 Constituency \nL196: mobilization $16,324 Fundraising $1,211 Management and general $5,865 Total \nL197: Expenses $45,820 Change in net assets $3,999 Net assets, start of year $41,145 \nL198: Net Assets, End of the Year $45,144 Dollars are in thousands. 12 DEFENDERS OF \nL199: WILDLIFE Grants and contributions 58% Income from investments 7% Requests, \nL200: trusts and split interests 15% Royalties and other income 7% Contributed \nL201: services 12% Program and support services 97% Fundraising 3% ===== Page 13 =====\nL202: DEFENDERS.ORG 13 ===== Page 14 ===== John Dayton, Esq.* Chair Texas Sheryl \nL203: Sachs* Vice Chair Connecticut Whitfield Bailey* Secretary Tennessee Mark Caylor*\nL204: Treasurer District of Columbia Sylvia Baca* California Donald Barry, Esq. \nL205: Oregon Dinah Bear, Esq. Arizona Kim OKeefe Beck, Ph.D California Gabriela \nL206: Chavarria, Ph.D Washington Jamie Rappaport Clark* President & CEO Virginia \nL207: Caroline D. Gabel Maryland Keith M. Harper, Esq. Maryland John Donovan, Esq. \nL208: Montana and California James M. Hecker, Esq. Virginia Judith Ponsikoff, Ph.D* \nL209: California Terry L. Root, Ph.D Florida BOARD EMERITUS Terry C. Pelster, Esq. New\nL210: York *Executive Committee member* ST A F F & A D V I S O R S SCIENCE ADVISORS \nL211: Gabriela Chavarria, Ph.D Burke Museum, University of Washington Leah Gerber, \nL212: Ph.D Arizona State University Kathayoon Khalil, Ph.D Oregon Zoo Rurik List, Ph.D\nL213: Universidad Autónoma Metropolitana-Lerma Barry Noon, Ph.D Colorado State \nL214: University Terry L. Root, Ph.D Stanford University Mark Schwartz, Ph.D \nL215: University of California-Davis David S. Wilcox, Ph.D Princeton University Rae ",
"tool_call_id": "fixture-tool-call-13-0"
},
{
"role": "assistant",
"content": "",
"reasoning_content": "Scrolling further for \"Contributed services 12%\". Already seen. So the annual report is Defenders of Wildlife 2021 annual report.\n\nNext clue: \"The following year, the annual report stated that the organization had played a role in rescinding a previous administrations plan to open most of the 22 million acres of the National Petroleum Reserve in Alaska to oil and gas drilling and that contributed services had decreased by 2% to 10%.\"\n\nThus, the following year (2022) annual report of Defenders of Wildlife includes mention of rescinding plan to open most of 22 million acres of NPR-A to oil and gas drilling. Also contributed services decreased to 10% (from 12% in 2021). So we need to find the 2022 annual report for Defenders of Wildlife.\n\nSearch for \"Defenders of Wildlife 2022 annual report National Petroleum Reserve Alaska\".",
"tool_calls": [
{
"id": "fixture-tool-call-15-0",
"type": "function",
"function": {
"name": "search",
"arguments": "{\"query\": \"Defenders of Wildlife 2022 annual report National Petroleum Reserve Alaska\", \"topn\": 10, \"source\": \"web\"}"
}
}
]
},
{
"role": "tool",
"content": "[7] Defenders of Wildlife 2022 annual report National Petroleum Reserve Alaska (Search_Results/Defenders of Wildlife 2022 annual report National Petroleum Reserve Alaska)\n**viewing lines [0 - 37] of 37**\n\nL0: \nL1: URL: Search_Results/Defenders of Wildlife 2022 annual report National Petroleum \nL2: Reserve Alaska\nL3: # Search Results\nL4: \nL5: * 【0†CELEBRATING YEARS; publish_date: none†www.annualreports.com】 With less \nL6: than 340 right whales left in the wild, Defenders is fighting tirelessly to end \nL7: deadly lobster gear entanglements and vessel strikes that are driving ...\nL8: * 【1†Financials; publish_date: none†defenders.org】 We invite you to explore \nL9: the reports below to learn more about our activities and accomplishments, and \nL10: how we put your money to work for wildlife.\nL11: * 【2†Alaska Program Looks Back on 2022; publish_date: none†defenders.org】 Feb \nL12: 9, 2023 — Thanks to a lawsuit joined by Defenders, seven million acres were \nL13: returned to protection within the National Petroleum Reserve-Alaska (NPR-A), ...\nL14: * 【3†Defenders-of-Wildlife-2022-Financial-Statement. ...; publish_date: \nL15: none†defenders.org】 We have audited the accompanying consolidated financial \nL16: statements of Defenders of Wildlife and Affiliated Defenders of Wildlife Action \nL17: Fund (collectively, ...\nL18: * 【4†2022 Annual Report; publish_date: none†alaskaconservation.org】 Jun 13, \nL19: 2023 — In 2022, we focused on three landscapes: the Arctic. National Wildlife \nL20: Refuge, Bristol Bay, and the Tongass National Forest. In March 2022,.\nL21: * 【5†Assessment of ecological and cultural values within the ...; \nL22: publish_date: none†www.blm.gov】 This document was written to provide technical \nL23: information regarding the ecological importance of the National Petroleum \nL24: Reserve Alaska (NPR-A). Several ...\nL25: * 【6†Accomplishments Report; publish_date: none†defenders.org】 National \nL26: Petroleum Reserve-Alaska. Identified for exceptional wildlife and cultural \nL27: values, including critical habitat for polar bears and other species ...\nL28: * 【7†2022 annual report; publish_date: none†dory-\nL29: plantain-s2zc.squarespace.com】 These projects are made possible through \nL30: collaborations with Defenders of. Wildlife, Cook Inletkeeper, Trustees for \nL31: Alaska,. Environmental Investigation Agency, ...\nL32: * 【8†23IMPACT REPORT; publish_date: none†www.annualreports.com】 Defenders of \nL33: Wildlife made incredible progress protecting wildlife and wild places in 2023, \nL34: helping shape a brighter future for imperiled species and vital ...\nL35: * 【9†From Leasing to Land Protections; publish_date: none†defenders-cci.org】 \nL36: Abstract. When you hear the name, National Petroleum Reserve - Alaska (NPR-A, or\nL37: Reserve), you might think of a barren land filled with oil wells and ...",
"tool_call_id": "fixture-tool-call-15-0"
},
{
"role": "assistant",
"content": "",
"reasoning_content": "Open result 0.",
"tool_calls": [
{
"id": "fixture-tool-call-17-0",
"type": "function",
"function": {
"name": "open",
"arguments": "{\"cursor\": 7, \"id\": 0}"
}
}
]
},
{
"role": "tool",
"content": "[8] www.annualreports.com (https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-wildlife_2022.pdf)\n**viewing lines [0 - 53] of 289**\n\nL0: \nL1: URL: https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-\nL2: wildlife_2022.pdf\nL3: ===== Page 1 ===== 2022 Impact Report C E L E B R A T I N G Y E A R S ===== Page\nL4: 2 ===== 2022 Defenders of Wildlife made important progress for imperiled \nL5: species and vital landscapes across the United States in 2022. GRAY WOLF | JIM \nL6: GUMMERAL MAY STOCK PRIOR Lawyers successfully challenged the previous \nL7: administrations decision to delist the gray wolf and restored critical federal \nL8: protections under the Endangered Species Act. This latest triumph in court is \nL9: part of our ongoing battle to protect and restore gray wolves throughout their \nL10: historical range and shield them from persecution by extremist legislators in \nL11: Idaho, Montana and Wyoming. TWO MORE FATALIZED GRAY SWALLETS TO SEA TO SHARE \nL12: ALLIANCE Lobbyists worked around the clock to expand funding for wildlife \nL13: conservation in the FY2022 federal spending bill, which included $31 million (a \nL14: 44% increase) for the Bureau of Land Managements Threatened and Endangered \nL15: Species Program, $2.5 million (an 81% increase) for the U.S. Department of \nL16: Agriculture Wildlife Services Nonlethal Initiative to prevent human-wildlife \nL17: conflicts and $21 million (a 320% increase) for North Atlantic right whale \nL18: conservation. 2 DEFENDERS OF WILDLIFE ===== Page 3 ===== **Policy Experts** \nL19: played a crucial role in securing international trade protections for 100 \nL20: species of sharks and rays, all 158 species of glass frogs and 73 species of \nL21: reptiles, including 21 species of desert horned lizards, at the Convention on \nL22: International Trade in Endangered Species (CITES) in Panama. \\--- **Field \nL23: Teams** worked tirelessly to protect and restore imperiled species across the \nL24: country. From Florida manatees and red wolves in the Southeast to belugas and \nL25: grizzly bears in Alaska, Defenders conservation experts were on the ground \nL26: saving species that need our help to survive and thrive. \\--- **Conservation \nL27: Innovation Experts** published more than 10 peer-reviewed studies on topics that\nL28: include the Cook Inlet beluga whale, golden-cheeked warbler, global parrot \nL29: biodiversity, the Endangered Species Act, the effects of mountaintop removal \nL30: mining on endangered species, the ecological importance of panthers and the \nL31: implementation of “30x30” the globally recognized goal to which President \nL32: Biden committed the U.S. to conserve 30% of our imperiled lands and waters by \nL33: 2030. \\--- **DEFENDERS.ORG** ===== Page 4 ===== THANK YOU Defenders celebrated \nL34: our 75th anniversary in 2022—an exciting milestone that we attribute to our \nL35: unwavering dedication to our wildlife conservation mission. From helping to pass\nL36: the Endangered Species Act and other bedrock conservation laws to leading the \nL37: advocacy on the reintroduction of wolves to Yellowstone National Park and other \nL38: parts of the West, Defenders is proud to inspire, lead and ultimately celebrate \nL39: some of the most consequential conservation wins for wildlife and wild places. \nL40: It is wonderfully fitting that we also mark 2022 as a year when we successfully \nL41: defended, yet again, the original focal species and symbol of our organization, \nL42: the gray wolf. While the future for gray wolves and many other species remains \nL43: uncertain, we will never stop fighting to protect the imperiled wildlife that \nL44: need us and to restore vibrant and balanced ecosystems. Our mission is more \nL45: important than ever due to the escalating biodiversity and climate crises. \nL46: Defenders is determined to build on our accomplishments and seize new \nL47: opportunities to address these challenges through our multifaceted approach to \nL48: conservation. We focus on developing innovative and science-based solutions, \nL49: coordinating on-the-ground species and habitat restoration, promoting human-\nL50: wildlife coexistence, protecting and strengthening our bedrock environmental \nL51: laws, investing in impactful collaborations and partnerships, building a more \nL52: diverse and powerful conservation movement and challenging attacks on wildlife \nL53: through advocacy, policy and litigation. Defenders continued success is made ",
"tool_call_id": "fixture-tool-call-17-0"
},
{
"role": "assistant",
"content": "",
"reasoning_content": "Search within PDF for \"National Petroleum Reserve\".",
"tool_calls": [
{
"id": "fixture-tool-call-19-0",
"type": "function",
"function": {
"name": "find",
"arguments": "{\"cursor\": 8, \"pattern\": \"Petroleum\"}"
}
}
]
},
{
"role": "tool",
"content": "[9] Find results for text: `petroleum` in `www.annualreports.com` (https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-wildlife_2022.pdf/find?pattern=petroleum)\n**viewing lines [0 - 4] of 4**\n\nL0: # 【0†match at L201】\nL1: the 22 million acres of the National Petroleum Reserve in Alaska to oil and gas \nL2: drilling and removed protections for important wildlife habitat. Protections \nL3: have now been restored for nearly half of the reserves pristine lands, which \nL4: are vital habitat for shorebirds, denning polar bears and tens of thousands of ",
"tool_call_id": "fixture-tool-call-19-0"
},
{
"role": "assistant",
"content": "",
"reasoning_content": "Open around L190.",
"tool_calls": [
{
"id": "fixture-tool-call-21-0",
"type": "function",
"function": {
"name": "open",
"arguments": "{\"cursor\": 8, \"loc\": 180}"
}
}
]
},
{
"role": "tool",
"content": "[10] www.annualreports.com (https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-wildlife_2022.pdf)\n**viewing lines [180 - 227] of 289**\n\nL180: the sixth successful transfer of bison to the Assiniboine and Sioux Tribes of \nL181: Fort Peck since 2019. \\--- **SWIFT FIX KITS | © ROSIMA PAELARINTSKIMMA MADDIAL \nL182: 200 AND CONSERVATION BIOLOGY INSTITUTE** \\--- **Celebrating our third year** of \nL183: a collaborative program with the Aaniih and Nakoda Tribes and others to restore \nL184: swift foxes to the Fort Belknap Indian Reservation in Montana, Defenders helped \nL185: with the release of 28 more swift foxes. With over 100 foxes reintroduced \nL186: through this program, monitoring efforts show that they are reproducing in the \nL187: wild—a critical measure of success for a self-sustaining population. \\--- \nL188: **Defenders continued to lead the way** for conserving and recovering the \nL189: endangered black-footed ferret, supporting the black-footed ferret survey for \nL190: the Fort Belknap Indian community. Thirty-six ferrets were vaccinated against \nL191: sylvatic plague and two dozen kits were released in the wild. \\--- **10 \nL192: DEFENDERS OF WILDLIFE** ===== Page 11 ===== Defenders helped to bring hope for \nL193: recovery for the endangered military macaw, adding 11 fledglings to a growing \nL194: wild population in Puerta Vallarta, Mexico, that is under pressure from habitat \nL195: loss and poachers for the illegal pet trade. Accord- ing to our recent report, \nL196: the 2008 parrot trade ban that Defenders fought to achieve is working. \nL197: Preventing more than 30,000 parrots from being illegally trapped each year, the \nL198: trade ban has resulted in a 47% decrease in the illegal trade of parrots and an \nL199: 88% decrease in U.S. seizures of Mexican parrots. As a result of a Defenders \nL200: lawsuit, BLM rescinded the previous administrations plan that opened most of \nL201: the 22 million acres of the National Petroleum Reserve in Alaska to oil and gas \nL202: drilling and removed protections for important wildlife habitat. Protections \nL203: have now been restored for nearly half of the reserves pristine lands, which \nL204: are vital habitat for shorebirds, denning polar bears and tens of thousands of \nL205: caribou in the Teshekpuk caribou herd. Our new storymap illustrates why the \nL206: reserve is so important to wildlife. Defenders marked the 20th anniversary of \nL207: our Sea Otter Awareness Week. In celebration of this milestone, we sponsored \nL208: state legislation, signed by Gov. Gavin Newsom, that formally recognizes the \nL209: events anniversary and acknowledges the vital role that sea otters play in \nL210: Californias nearshore ecosystems. This annual celebration provides a unique \nL211: opportunity to raise awareness of the important role sea otters play in \nL212: maintaining ecosystem health, our need to coexist with sea otters and the \nL213: efforts of Defenders and others in advancing sea otter conservation. \nL214: DEFENDERS.ORG ===== Page 12 ===== FINANCIAL REPORT In 2022, Defenders of \nL215: Wildlifes nearly 2.2 million members and supporters contributed more than $41 \nL216: million for wildlife and wildlife habitat. This figure includes all those who \nL217: generously donated their time and expertise. The steadfast support of our donors\nL218: allows Defenders to sustain our program and public education efforts in the \nL219: field, the courts and on Capitol Hill. 2022 SOURCES OF FUNDS Grants and \nL220: contributions $28,946 Bequests, trusts and split interests $13,928 Income from \nL221: investments, annuity reserve funds and trusts (-$7,495) Royalties and other \nL222: income $744 Contributed services $5,047 Total Revenue $41,170 USES OF FUNDS \nL223: Biodiversity conservation $18,970 Constituency mobilization $16,528 Fundraising \nL224: $1,029 Management and general $6,354 Total Expenses $42,881 Change in net assets\nL225: (-$1,711) Net assets, start of year $45,144 Net Assets, End of the Year $43,433\nL226: Dollars are in thousands. 12 DEFENDERS OF WILDLIFE Grants and contributions 59%\nL227: Contributed services 10% Royalties and other income 2% Bequests, trusts and ",
"tool_call_id": "fixture-tool-call-21-0"
},
{
"role": "assistant",
"content": "",
"reasoning_content": "Scrolling a bit more for \"Contributed services 10%\". Already seen. So indeed the 2022 annual report shows contributed services 10%, down from 12% in 2021.\n\nThus the organization is Defenders of Wildlife.\n\nThe question: \"Who was the emeritus board member that year? Name only, no titles.\"\n\nWe need to identify the emeritus board member listed in the 2022 annual report. From the 2021 annual report, we saw \"BOARD EMERITUS Terry C. Pelster, Esq.\" However, that is for 2021. The question asks \"Who was the emeritus board member that year?\" That year refers to the following year (2022). So we need to check the 2022 annual report for the emeritus board member.\n\nOpen the 2022 PDF near the board list. Search for \"Emeritus\".",
"tool_calls": [
{
"id": "fixture-tool-call-23-0",
"type": "function",
"function": {
"name": "find",
"arguments": "{\"cursor\": 8, \"pattern\": \"Emeritus\"}"
}
}
]
},
{
"role": "tool",
"content": "[11] Find results for text: `emeritus` in `www.annualreports.com` (https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-wildlife_2022.pdf/find?pattern=emeritus)\n**viewing lines [0 - 10] of 10**\n\nL0: # 【0†match at L237】\nL1: Sisk, Ph.D British Columbia, Canada BOARD EMERITUS Terry C. Pelster, Esq. New \nL2: York *Executive Committee member* STAFF & ADVISORS SCIENCE ADVISORS Gabriela \nL3: Chavarria, Ph.D Burke Museum, University of Washington Leah Gerber, Ph.D Arizona\nL4: State University Kathayoon Khalil, Ph.D Oregon Zoo Rurik List, Ph.D Universidad\nL5: \nL6: # 【1†match at L243】\nL7: Davis Thomas D. Sisk, Ph.D Emeritus Professor Northern Arizona University David \nL8: S. Wilcox, Ph.D Princeton University Rae Wynn-Grant, Ph.D National Geographic \nL9: Society SENIOR STAFF Jamie Rappaport Clark President & CEO Thu Pham Chief of \nL10: Staff James Stofan Senior Vice President, Operations McCrystle Adams Vice ",
"tool_call_id": "fixture-tool-call-23-0"
},
{
"role": "assistant",
"content": "",
"reasoning_content": "Open around L230.",
"tool_calls": [
{
"id": "fixture-tool-call-25-0",
"type": "function",
"function": {
"name": "open",
"arguments": "{\"cursor\": 8, \"loc\": 220}"
}
}
]
},
{
"role": "tool",
"content": "[12] www.annualreports.com (https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-wildlife_2022.pdf)\n**viewing lines [220 - 267] of 289**\n\nL220: contributions $28,946 Bequests, trusts and split interests $13,928 Income from \nL221: investments, annuity reserve funds and trusts (-$7,495) Royalties and other \nL222: income $744 Contributed services $5,047 Total Revenue $41,170 USES OF FUNDS \nL223: Biodiversity conservation $18,970 Constituency mobilization $16,528 Fundraising \nL224: $1,029 Management and general $6,354 Total Expenses $42,881 Change in net assets\nL225: (-$1,711) Net assets, start of year $45,144 Net Assets, End of the Year $43,433\nL226: Dollars are in thousands. 12 DEFENDERS OF WILDLIFE Grants and contributions 59%\nL227: Contributed services 10% Royalties and other income 2% Bequests, trusts and \nL228: split interests 29% TOTAL REVENUE Income from investments -15% Program and \nL229: support services 98% USE OF FUNDS Fundraising 2% ===== Page 13 ===== \nL230: DEFENDERS.ORG 13 ===== Page 14 ===== BOARD OF DIRECTORS John Dayton, Esq.* Chair\nL231: Texas Sheryl Sachs* Vice Chair Connecticut Dinah Bear, Esq.* Secretary Arizona \nL232: Mark Caylor* Treasurer District of Columbia Susan Alexander Connecticut Sylvia \nL233: Baca* California Whitefield Bailey Tennessee Donald Barry, Esq. Oregon Kim \nL234: OKeefe Beck, Ph.D California Gabriela Chavarria, Ph.D Washington Jamie \nL235: Rappaport Clark* President & CEO Virginia John Donovan, Esq.* Montana and \nL236: California Caroline D. Gabel Maryland Terry L. Root, Ph.D New Mexico Thomas D. \nL237: Sisk, Ph.D British Columbia, Canada BOARD EMERITUS Terry C. Pelster, Esq. New \nL238: York *Executive Committee member* STAFF & ADVISORS SCIENCE ADVISORS Gabriela \nL239: Chavarria, Ph.D Burke Museum, University of Washington Leah Gerber, Ph.D Arizona\nL240: State University Kathayoon Khalil, Ph.D Oregon Zoo Rurik List, Ph.D Universidad\nL241: Autónoma Metropolitana-Lerma Barry Noon, Ph.D Colorado State University Terry \nL242: L. Root, Ph.D Stanford University Mark Schwartz, Ph.D University of California-\nL243: Davis Thomas D. Sisk, Ph.D Emeritus Professor Northern Arizona University David \nL244: S. Wilcox, Ph.D Princeton University Rae Wynn-Grant, Ph.D National Geographic \nL245: Society SENIOR STAFF Jamie Rappaport Clark President & CEO Thu Pham Chief of \nL246: Staff James Stofan Senior Vice President, Operations McCrystle Adams Vice \nL247: President, Conservation Law Matt Braughler Vice President, Philanthropy Rachel \nL248: Brittin Vice President, Communications Shawn Cantrell Vice President, Field \nL249: Conservation Programs Robert L. Dewey Vice President, Government Relations & \nL250: External Affairs Monica Goldberg Vice President, Landscape Conservation Sharon \nL251: Morris-Smith Vice President of Human Resources Elizabeth Nichols Vice President,\nL252: Finance/CFO Anthony Pegues Vice President, Diversity, Equity, Inclusion, \nL253: Justice Lindsay Rosa, Ph.D Vice President, Conservation Research and Innovation \nL254: Brandace Swimley Vice President, Integrated Marketing 14 DEFENDERS OF WILDLIFE \nL255: ===== Page 15 ===== There are many ways to support Defenders of Wildlife and \nL256: protect our imperiled wildlife. To learn more or donate, contact us at \nL257: 1-800-385-9712, visit Defenders.org/donate or choose one of the following \nL258: options. ➤ Transfer appreciated assets directly to Defenders and maximize \nL259: potential tax benefits. Defenders.org/ donate-stocks-help-save-wildlife ➤ Become\nL260: a Leadership Council member with a single donation of $1,000 or more and enjoy \nL261: more personal involvement with Defenders. Defenders.org/leadershipcouncil ➤ If \nL262: you are 70.5 or older, roll over a portion of your IRA to benefit wildlife. \nL263: Remember, these gifts must come directly from your IRA account. \nL264: Defenders.org/planned-giving/gifts-your-retirement-plan ➤ Get started on a \nL265: contribution from your donor-advised fund Defenders.org/donor-advised-funds ➤ \nL266: Check with your personnel office or that of your spouse/partner about your \nL267: companys Corporate Matching Gifts program or visit us at ",
"tool_call_id": "fixture-tool-call-25-0"
},
{
"role": "assistant",
"content": "",
"reasoning_content": "Thus the emeritus board member is Terry C. Pelster. However, the question might ask for \"Who was the emeritus board member that year?\" Possibly referring to the year of the annual report that mentioned the rescinded plan and contributed services decrease. That is 2022. So the answer is Terry C. Pelster.\n\nBut let's verify that the 2022 annual report indeed includes the mention of contributed services decreasing to 10%. We saw that. So answer is Terry C. Pelster.\n\nBut maybe the question is tricky: \"Who was the emeritus board member that year?\" Could be the emeritus board member listed in the 2021 annual report? However, the question says \"That same year, a company's annual report revealed that its nearly 2.2 million members and supporters contributed over $49 million, with 12% of total revenue coming from contributed services. The following year, the annual report stated that the organization had played a role in rescinding a previous administrations plan to open most of the 22 million acres of the National Petroleum Reserve in Alaska to oil and gas drilling and that contributed services had decreased by 2% to 10%.\"\n\nThus the \"following year\" annual report is 2022. The question likely asks: \"Who was the emeritus board member that year?\" Meaning the emeritus board member listed in that annual report (2022). So answer: Terry C. Pelster.\n\nBut let's confirm that the 2021 annual report also shows emeritus board member Terry C. Pelster. Indeed we saw that. But perhaps the 2022 report might have a different emeritus board member. We saw it's the same. So answer is Terry C. Pelster.\n\nHowever, the question might be about a different organization. Let's verify the details.\n\nFirst clue: Press release about Kenya vaccine doses on Dec 5, 2021. That is likely a World Bank press release. Search for \"World Bank Kenya vaccine doses December 2021 press release\".",
"tool_calls": [
{
"id": "fixture-tool-call-27-0",
"type": "function",
"function": {
"name": "search",
"arguments": "{\"query\": \"As of December 5, 2021, Kenya had received a total of 16,201,670 vaccines\", \"topn\": 10, \"source\": \"web\"}"
}
}
]
},
{
"role": "tool",
"content": "[13] As of December 5, 2021, Kenya had received a total of 16,201,670 vaccines (Search_Results/As of December 5, 2021, Kenya had received a total of 16,201,670 vaccines)\n**viewing lines [0 - 41] of 41**\n\nL0: \nL1: URL: Search_Results/As of December 5, 2021, Kenya had received a total of \nL2: 16,201,670 vaccines\nL3: # Search Results\nL4: \nL5: * 【0†Kenya's Economy is Showing Resilience as Output Rises ...; publish_date: \nL6: none†www.worldbank.org】 Dec 14, 2021 — As of December 5, 2021, Kenya had \nL7: received a total of 16,201,670 vaccines, with 7,583,134 administered. While \nL8: vaccine acceptance is ...\nL9: * 【1†Unmet need for COVID-19 vaccination coverage in Kenya - PMC; \nL10: publish_date: none†pmc.ncbi.nlm.nih.gov】 by SK Muchiri · 2022 · Cited by 42 — As\nL11: of December 2021, six counties had a vaccination coverage of less than 5%. \nL12: These counties include Garissa, Mandera, Marsabit, Tana River, Turkana, and ...\nL13: * 【2†MINISTRY OF HEALTH; publish_date: none†covidhub.mediacouncil.or.ke】 Dec \nL14: 1, 2021 — • Total Covid-19 Vaccines Received to date- 16,201,670 ... Table 10: \nL15: Vaccine Logistics Received in the Country as at 5th, December 2021.\nL16: * 【3†COVID-19 vaccination refusal trends in Kenya over 2021 - PMC; \nL17: publish_date: none†pmc.ncbi.nlm.nih.gov】 by RT Rego · 2023 · Cited by 21 — We \nL18: assessed vaccine refusal over time in Kenya, and characterized factors \nL19: associated with changes in vaccination refusal.\nL20: * 【4†Ciheb-Kenya on the Front Lines of the COVID-19 Vaccine ...; publish_date:\nL21: none†www.ciheb.org】 Of the 721,509 persons vaccinated, 409,711 are aged 58 \nL22: years and above, 143,050 are healthcare workers, 109,593 are teachers, and \nL23: 59,155 are security officers.\nL24: * 【5†proportion of fully vaccinated adults 23.3pc.; publish_date: \nL25: none†www.facebook.com】 COVID-19: KENYA administers 150632 new vaccinations; \nL26: total jabs given now 13782963; proportion of fully vaccinated adults 23.3pc.\nL27: * 【6†National COVID-19 Vaccines Deployment and ...; publish_date: \nL28: none†www.parliament.go.ke】 Kenya has a well-established immunization program and\nL29: a robust disease surveillance and response unit (DSRU) which serves as an early\nL30: warning system, to.\nL31: * 【7†Kenya: Vaccine Requirements Violate Rights; publish_date: \nL32: none†www.hrw.org】 Dec 13, 2021 — Kenya, with an estimated adult population of \nL33: 27.2 million and a total population of 55 million, has received approximately 23\nL34: million doses as ...\nL35: * 【8†Government gives 20 million a month to get vaccinated; publish_date: \nL36: none†www.bbc.com】 Nov 22, 2021 — Although less than 10% of Kenyans are \nL37: vaccinated, the government wants to avoid a surge over Christmas.\nL38: * 【9†ministry of health kenya covid-19 immunization status ...; publish_date: \nL39: none†www.health.go.ke】 May 15, 2022 — By the end of 2021 Kenya had administered \nL40: 10.1 million doses of 5 types of Covid-19 vaccines. During this period 4.2 \nL41: million people were ...",
"tool_call_id": "fixture-tool-call-27-0"
},
{
"role": "assistant",
"content": "",
"reasoning_content": "Open result 0.",
"tool_calls": [
{
"id": "fixture-tool-call-29-0",
"type": "function",
"function": {
"name": "open",
"arguments": "{\"cursor\": 13, \"id\": 0}"
}
}
]
},
{
"role": "tool",
"content": "[14] www.worldbank.org (https://www.worldbank.org/en/news/press-release/2021/12/14/kenya-s-economy-is-showing-resilience-as-output-rises-above-pre-pandemic-levels-driven-by-a-rebound-in-the-services-sect)\n**viewing lines [0 - 52] of 61**\n\nL0: \nL1: URL: https://www.worldbank.org/en/news/press-release/2021/12/14/kenya-s-economy-\nL2: is-showing-resilience-as-output-rises-above-pre-pandemic-levels-driven-by-a-\nL3: rebound-in-the-services-sect\nL4: NAIROBI, December 14, 2021 — Kenyas economy has demonstrated resilience to the \nL5: COVID-19 shock, with output in the first half of the year rising above pre-\nL6: pandemic levels. In 2021 as a whole, gross domestic product (GDP) is expected to\nL7: grow by 5%, one of the faster recoveries among Sub-Saharan African countries. \nL8: Overall economic performance is expected to be robust at 4.9% per year in \nL9: 2022-23, similar to the pre-pandemic pace (5% average annual growth from 2010 to\nL10: 2019). According to the 24th edition of the Kenya Economic Update, “From \nL11: Recovery to Better Jobs,” growth has been supported by rebounds in industry and,\nL12: especially, services. Agricultural output, however, fell by 0.5% year on year \nL13: in the first half of 2021 following a particularly strong performance in 2020, \nL14: partly due to below-average rains. Demand-side recovery has been supported by a \nL15: revival in private consumption, against a backdrop of improving employment \nL16: conditions and household incomes. “Kenyas economy has shown considerable \nL17: resilience to the enormous shock of the pandemic, and this year is expected to \nL18: post one of the stronger growth rebounds in the region thanks to diversified \nL19: sources of growth and sound economic policies and management,” said Keith \nL20: Hansen, World Bank Country Director for Kenya. “However, poverty has increased, \nL21: and the buffers and coping mechanisms of households, firms, and the public \nL22: finances have been depleted.” Economic activity in Kenya has continued to adapt \nL23: to the pandemic and associated restrictions. A mix of containment measures, such\nL24: as a nightly curfew, were in effect through most of 2021, while more \nL25: economically disruptive measures such as lockdowns and travel restrictions were \nL26: phased, limiting the impact on economic activities. The vaccine rollout, which \nL27: had a slow start due to supply constraints, has picked up as new shipments of \nL28: vaccines have arrived, particularly since September. This has supported economic\nL29: recovery and growth through the third quarter of 2021. As of December 5, 2021, \nL30: Kenya had received a total of 16,201,670 vaccines, with 7,583,134 administered. \nL31: While vaccine acceptance is reportedly high there is still a long way to go \nL32: towards the governments target of fully inoculating the adult population of \nL33: about 30 million by the end of 2022.As of December 6, 2021, about 10% of adults \nL34: (2.9 million people) had been fully vaccinated while another 16% (4.9 million \nL35: people) had received their first dose. The report also projects robust growth in\nL36: the medium-term. This outlook takes into account that some sub-sectors, such as\nL37: education, have bounced back strongly, but others such as international \nL38: tourism, have only partially and face a much more protracted recovery. The \nL39: anticipated further recovery of hotels and restaurants, trade, transport, and \nL40: other services, depends on substantial vaccination progress to help prevent new \nL41: waves of infections and associated containment measures. “Recent economic \nL42: performance has been strong and the outlook is positive, but in Kenya, as \nL43: everywhere, the future course of the pandemic remains a major source of \nL44: uncertainty, said Alex Sienaert, Senior Economist for Kenya. “To support further\nL45: recovery, reduce debt distress risks, and rebuild space for social and \nL46: development spending, it is critical that Kenya continues to implement medium-\nL47: term fiscal consolidation plans.” In addition to pandemic-related risks to the \nL48: outlook, a second key domestic risk factor stems from the drought conditions \nL49: which are affecting parts of the country and already causing severe hardship. \nL50: Should the drought intensify or spread, this would weigh on the near-term \nL51: economic outlook. Weaker global growth, higher-than-anticipated energy prices, \nL52: and tighter external financing conditions are the primary external risks. As ",
"tool_call_id": "fixture-tool-call-29-0"
},
{
"role": "assistant",
"content": "",
"reasoning_content": "Thus first clue confirmed.\n\nSecond clue: Defenders of Wildlife 2021 annual report. Confirmed.\n\nThird clue: Defenders of Wildlife 2022 annual report. Confirmed.\n\nThus the answer is Terry C. Pelster.\n\nBut let's verify that the 2022 annual report indeed shows \"Contributed services 10%\". We saw that. Also confirm that 2021 report shows 12%. We saw that.\n\nThus the question: \"Who was the emeritus board member that year?\" Refers to 2022. Answer: Terry C. Pelster.\n\nHowever, the question might be asking for the emeritus board member in the 2021 report? Let's read the question carefully:\n\n\"In a year before 2023, between December 1 and 15 (inclusive), a press release reported that as of the 5th of that month, a country had received a total of 16,201,670 vaccine doses, with 7,583,134 administered. That same year, a company's annual report revealed that its nearly 2.2 million members and supporters contributed over $49 million, with 12% of total revenue coming from contributed services. The following year, the annual report stated that the organization had played a role in rescinding a previous administrations plan to open most of the 22 million acres of the National Petroleum Reserve in Alaska to oil and gas drilling and that contributed services had decreased by 2% to 10%. Who was the emeritus board member that year? Name only, no titles.\"\n\nThe phrase \"that year\" could refer to \"the following year\" i.e., the year of the annual report that mentioned the rescinded plan. Or could refer to \"that same year\" (the year of the press release and the first annual report). However, the phrase \"that year\" appears at the end: \"Who was the emeritus board member that year?\" It likely refers to the year of the annual report that is being discussed in the preceding sentence. The preceding sentence talks about the following year's annual report. So indeed \"that year\" is the following year (2022).\n\nThus answer: Terry C. Pelster.\n\nBut to be thorough, we should check the 2022 annual report for any other emeritus board members. The search only showed \"BOARD EMERITUS Terry C. Pelster, Esq.\" Maybe there are multiple emeritus board members. But likely only one.\n\nCheck the 2021 annual report for emeritus board member. It also shows Terry C. Pelster. So maybe the question is asking for the emeritus board member in the 2021 report. But why would they ask that? The question is about connecting the clues to identify the organization and then find the emeritus board member. The clues point to Defenders of Wildlife. The question likely asks for the emeritus board member as of the 2022 report.\n\nNevertheless, we should confirm that the 2022 report is indeed the one that mentions the rescinded plan. We saw that. Also confirm that contributed services decreased from 12% to 10%. We saw that.\n\nThus answer is Terry C. Pelster.\n\nBut let's search for \"Defenders of Wildlife emeritus board member 2022 Terry Pelster\".",
"tool_calls": [
{
"id": "fixture-tool-call-31-0",
"type": "function",
"function": {
"name": "search",
"arguments": "{\"query\": \"Terry C. Pelster Defenders of Wildlife emeritus board member\", \"topn\": 10, \"source\": \"web\"}"
}
}
]
},
{
"role": "tool",
"content": "[15] Terry C. Pelster Defenders of Wildlife emeritus board member (Search_Results/Terry C. Pelster Defenders of Wildlife emeritus board member)\n**viewing lines [0 - 31] of 31**\n\nL0: \nL1: URL: Search_Results/Terry C. Pelster Defenders of Wildlife emeritus board member\nL2: # Search Results\nL3: \nL4: * 【0†2024 impact report; publish_date: none†defenders.org】 Terry Root, Ph.D. \nL5: New Mexico. Thomas D. Sisk, Ph.D. British Columbia, Canada. BOARD EMERITUS. \nL6: Terry C. Pelster, Esq. New York. SCIENCE ADVISORS. Leah Gerber, Ph ...\nL7: * 【1†Defenders of Wildlife; publish_date: none†www.wrongkindofgreen.org】 \nL8: Directors ; Victor M. Sher*, Chair California ; Terry C. Pelster*, Vice Chair \nL9: New York ; Richard Kopcho*, Treasurer California ; Adelaide P. Gomer*, \nL10: Secretary ...\nL11: * 【2†Exhibit 12; publish_date: none†www.uschamber.com】 DECLARATION OF TERRY \nL12: PELSTER. I, Terry C. Pelster, declare as follows: 1. I am a current member of \nL13: Defenders of Wildlife (“Defenders”) and have been a member.\nL14: * 【3†2020 ANNUAL REPORT; publish_date: none†defenders.org】 Terry L. Root. \nL15: Florida. BOARD. EMERITUS. Terry C. Pelster. New York. Alan Steinberg. Florida. \nL16: SCIENCE. ADVISORS. Gabriela Chavarria,. Ph.D. Denver Museum of.\nL17: * 【4†2019 Annual Report; publish_date: none†defenders.org】 BOARD EMERITUS. \nL18: Terry C. Pelster. New York. Alan Steinberg. Florida. SCIENCE ADVISORS. Leah \nL19: Gerber, Ph.D. Arizona State University. Kathayoon Khalil, Ph.D.\nL20: * 【5†annual report; publish_date: none†defenders.org】 * Executive Committee \nL21: member. BoARD EMERITuS. Terry C. Pelster. New York. Alan Steinberg. Florida. \nL22: PRESIDENT. EMERITuS. Rodger Schlickeisen. Virginia. SCIENCE ...\nL23: * 【6†BEING THERE FOR BELUGAS; publish_date: none†wendeenicole.com】 Mar 2, 2023\nL24: — Defenders of Wildlife is a national, nonprofit member- ship ... Terry C. \nL25: Pelster, Esq. New York. *Executive Committee member outfront.\nL26: * 【7†Annual Reports; publish_date: none†www.annualreports.com】 WAYS TO GIVE \nL27: There are many ways to support Defenders of Wildlife and protect our imperiled \nL28: wildlife. ... Terry C. Pelster. New York. Alan Steinberg. Florida.\nL29: * 【8†2014 Annual Report; publish_date: none†defenders.org】 BOARD OF DIRECTORS \nL30: defenders of wildlife is a national, nonprofit ... BOARD EMERITUS. Terry C. \nL31: Pelster. New York. Alan Steinberg. Florida. Page 3. PO.",
"tool_call_id": "fixture-tool-call-31-0"
},
{
"role": "assistant",
"content": "",
"reasoning_content": "Thus Terry C. Pelster is indeed the emeritus board member.\n\nNow the question might want just \"Terry C. Pelster\". Or maybe \"Terry Pelster\". Or \"Terry C. Pelster, Esq.\" but instructions say \"Name only, no titles.\" So likely \"Terry C. Pelster\". However, maybe they want \"Terry Pelster\". Better to include full name as listed: \"Terry C. Pelster\".\n\nLet's read the question again: \"Who was the emeritus board member that year? Name only, no titles.\" So we should answer just the name, no \"Esq.\" or \"Board Emeritus\". So \"Terry C. Pelster\". Or maybe \"Terry Pelster\". But the document shows \"Terry C. Pelster, Esq.\" So the name is \"Terry C. Pelster\". \n\nBetter to verify that the 2022 annual report indeed lists \"BOARD EMERITUS Terry C. Pelster, Esq.\" We saw that line.\n\nThus answer: Terry C. Pelster.\n\nBut let's double-check that the question might refer to the 2021 annual report. The phrase \"that year\" could be ambiguous. Let's parse the timeline:\n\n- Year 1: 2021. Press release in Dec 2021 about Kenya vaccine doses. Same year (2021) annual report of Defenders of Wildlife shows 12% contributed services.\n\n- Year 2: 2022. Annual report mentions rescinded plan and contributed services decreased to 10%.\n\nThe question says \"The following year, the annual report stated that the organization had played a role in rescinding a previous administrations plan ... and that contributed services had decreased by 2% to 10%.\" This is year 2.\n\nThen asks \"Who was the emeritus board member that year?\" That likely refers to year 2.\n\nThus answer: Terry C. Pelster.\n\nHowever, perhaps the emeritus board member changed between 2021 and 2022. We need to check 2022 annual report for any other emeritus board member. We saw only Terry C. Pelster. Also check 2021 annual report: same. So likely unchanged.\n\nThus answer is Terry C. Pelster.\n\nNevertheless, we should confirm that the 2022 annual report is indeed the one that includes the phrase \"BLM rescinded the previous administrations plan that opened most of the 22 million acres of the National Petroleum Reserve in Alaska to oil and gas drilling\". We saw that in line 200-202.\n\nThus answer is Terry C. Pelster.\n\nNow we need to provide the answer in the final message. Only the name.\n\nThus final answer: Terry C. Pelster.\n\nBut let's be thorough and search for \"Defenders of Wildlife 2022 annual report contributed services 10%\".",
"tool_calls": [
{
"id": "fixture-tool-call-33-0",
"type": "function",
"function": {
"name": "find",
"arguments": "{\"cursor\": 8, \"pattern\": \"Contributed services\"}"
}
}
]
},
{
"role": "tool",
"content": "[16] Find results for text: `contributed services` in `www.annualreports.com` (https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-wildlife_2022.pdf/find?pattern=contributed services)\n**viewing lines [0 - 10] of 10**\n\nL0: # 【0†match at L222】\nL1: income $744 Contributed services $5,047 Total Revenue $41,170 USES OF FUNDS \nL2: Biodiversity conservation $18,970 Constituency mobilization $16,528 Fundraising \nL3: $1,029 Management and general $6,354 Total Expenses $42,881 Change in net assets\nL4: (-$1,711) Net assets, start of year $45,144 Net Assets, End of the Year $43,433\nL5: \nL6: # 【1†match at L227】\nL7: Contributed services 10% Royalties and other income 2% Bequests, trusts and \nL8: split interests 29% TOTAL REVENUE Income from investments -15% Program and \nL9: support services 98% USE OF FUNDS Fundraising 2% ===== Page 13 ===== \nL10: DEFENDERS.ORG 13 ===== Page 14 ===== BOARD OF DIRECTORS John Dayton, Esq.* Chair",
"tool_call_id": "fixture-tool-call-33-0"
},
{
"role": "assistant",
"content": "",
"reasoning_content": "Thus confirmed.\n\nNow we should verify that the 2021 annual report shows contributed services 12%.",
"tool_calls": [
{
"id": "fixture-tool-call-35-0",
"type": "function",
"function": {
"name": "find",
"arguments": "{\"cursor\": 4, \"pattern\": \"Contributed services\"}"
}
}
]
},
{
"role": "tool",
"content": "[17] Find results for text: `contributed services` in `www.annualreports.com` (https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-wildlife_2021.pdf/find?pattern=contributed services)\n**viewing lines [0 - 4] of 4**\n\nL0: # 【0†match at L194】\nL1: $3,354 Royalties and other income $3,576 Contributed services $6,140 Total \nL2: Revenue $49,819 USES OF FUNDS Biodiversity conservation $22,420 Constituency \nL3: mobilization $16,324 Fundraising $1,211 Management and general $5,865 Total \nL4: Expenses $45,820 Change in net assets $3,999 Net assets, start of year $41,145 ",
"tool_call_id": "fixture-tool-call-35-0"
},
{
"role": "assistant",
"content": "",
"reasoning_content": "Open around L190-200 to see percentage.",
"tool_calls": [
{
"id": "fixture-tool-call-37-0",
"type": "function",
"function": {
"name": "open",
"arguments": "{\"cursor\": 4, \"loc\": 185}"
}
}
]
},
{
"role": "tool",
"content": "[18] www.annualreports.com (https://www.annualreports.com/HostedData/AnnualReportArchive/d/defenders-wildlife_2021.pdf)\n**viewing lines [185 - 232] of 261**\n\nL185: devastating water pollution that is killing the seagrass and causing manatees to\nL186: starve. DAVID TES | SAM FRENZY DRAW DEFENDERS.ORG 11 ===== Page 12 ===== In \nL187: 2021, Defenders of Wildlifes nearly 2.2 million members and supporters \nL188: contributed more than $49 million for wildlife and wildlife habitat. This figure\nL189: includes all those who generously donated their time and expertise. The \nL190: steadfast support of our donors allows Defenders to sustain our program and \nL191: public education efforts in the field, the courts and on Capitol Hill. 2021 \nL192: SOURCES OF FUNDS Grants and contributions $29,057 Bequests, trusts and split \nL193: interests $7,692 Income from investments, annuity reserve funds and trusts \nL194: $3,354 Royalties and other income $3,576 Contributed services $6,140 Total \nL195: Revenue $49,819 USES OF FUNDS Biodiversity conservation $22,420 Constituency \nL196: mobilization $16,324 Fundraising $1,211 Management and general $5,865 Total \nL197: Expenses $45,820 Change in net assets $3,999 Net assets, start of year $41,145 \nL198: Net Assets, End of the Year $45,144 Dollars are in thousands. 12 DEFENDERS OF \nL199: WILDLIFE Grants and contributions 58% Income from investments 7% Requests, \nL200: trusts and split interests 15% Royalties and other income 7% Contributed \nL201: services 12% Program and support services 97% Fundraising 3% ===== Page 13 =====\nL202: DEFENDERS.ORG 13 ===== Page 14 ===== John Dayton, Esq.* Chair Texas Sheryl \nL203: Sachs* Vice Chair Connecticut Whitfield Bailey* Secretary Tennessee Mark Caylor*\nL204: Treasurer District of Columbia Sylvia Baca* California Donald Barry, Esq. \nL205: Oregon Dinah Bear, Esq. Arizona Kim OKeefe Beck, Ph.D California Gabriela \nL206: Chavarria, Ph.D Washington Jamie Rappaport Clark* President & CEO Virginia \nL207: Caroline D. Gabel Maryland Keith M. Harper, Esq. Maryland John Donovan, Esq. \nL208: Montana and California James M. Hecker, Esq. Virginia Judith Ponsikoff, Ph.D* \nL209: California Terry L. Root, Ph.D Florida BOARD EMERITUS Terry C. Pelster, Esq. New\nL210: York *Executive Committee member* ST A F F & A D V I S O R S SCIENCE ADVISORS \nL211: Gabriela Chavarria, Ph.D Burke Museum, University of Washington Leah Gerber, \nL212: Ph.D Arizona State University Kathayoon Khalil, Ph.D Oregon Zoo Rurik List, Ph.D\nL213: Universidad Autónoma Metropolitana-Lerma Barry Noon, Ph.D Colorado State \nL214: University Terry L. Root, Ph.D Stanford University Mark Schwartz, Ph.D \nL215: University of California-Davis David S. Wilcox, Ph.D Princeton University Rae \nL216: Wynn-Grant, Ph.D National Geographic Society SENIOR STAFF Jamie Rappaport Clark \nL217: President & CEO Thu Pham Chief of Staff Renee Stone Senior Vice President, \nL218: Conservation Programs & General Counsel James Stefan Senior Vice President, \nL219: Operations Matt Braughler Vice President, Philanthropy Rachel Brittin Vice \nL220: President, Communications Shawn Cantrell Vice President, Field Conservation \nL221: Programs Robert L. Dewey Vice President, Government Relations & External Affairs\nL222: Monica Goldberg Vice President, Landscape Conservation Sharon Morris-Smith Vice\nL223: President of Human Resources Elizabeth Nichols Vice President, Finance/CFO \nL224: Anthony Pegues Vice President, Diversity, Equity, Inclusion, Justice Lindsay \nL225: Rosa, Ph.D Director, Center for Conservation Innovation Michael P. Senatore Vice\nL226: President, Conservation Law Brandace Swimley Vice President, Integrated \nL227: Marketing 14 DEFENDERS OF WILDLIFE ===== Page 15 ===== THERE ARE MANY WAYS TO \nL228: SUPPORT DEFENDERS OF WILDLIFE and protect our imperiled wildlife. To learn more \nL229: or donate, contact us at 1-800-385-9712, visit defenders.org/donate or choose \nL230: one of the following options. ➤ Transfer appreciated assets directly to \nL231: Defenders and maximize potential tax benefits. Defenders.org/donate-stocks-help-\nL232: save-wildlife ➤ Become a Leadership Council member with a single donation of ",
"tool_call_id": "fixture-tool-call-37-0"
},
{
"role": "assistant",
"content": "Terry C. Pelster",
"reasoning_content": "Thus everything matches.\n\nThus answer: Terry C. Pelster.\n\nNow we need to output only the name.\n\nThus final answer: Terry C. Pelster.\n\nBut perhaps the question expects \"Terry Pelster\". However, the document includes middle initial \"C.\" It might be better to include the full name as listed: \"Terry C. Pelster\". The instruction \"Name only, no titles\" suggests we should not include \"Esq.\" or \"Board Emeritus\". But including middle initial is likely okay.\n\nTo be safe, we could output \"Terry C. Pelster\". Let's see similar puzzles: often they expect the full name as appears in the document.\n\nThus answer: Terry C. Pelster.\n\nNow we will provide final answer.",
"tool_calls": []
}
]
}
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,112 @@
<begin▁of▁sentence>
## Tools
You have access to a set of tools you can use to answer the user's question.
You can invoke functions by writing a "<DSMLfunction_calls>" block like the following as part of your reply to the user:
<DSMLfunction_calls>
<DSMLinvoke name="$FUNCTION_NAME">
<DSMLparameter name="$PARAMETER_NAME" string="true|false">$PARAMETER_VALUE</DSMLparameter>
...
</DSMLinvoke>
<DSMLinvoke name="$FUNCTION_NAME2">
...
</DSMLinvoke>
</DSMLfunction_calls>
String and scalar parameters should be specified as is without any escaping or quotes, while lists and objects should use JSON format. The "string" attribute should be set to "true" for string type parameters and "false" for other types (numbers, booleans, arrays, objects).
If the thinking_mode is enabled, then after function results you should strongly consider outputting a thinking block. Here is an example:
<DSMLfunction_calls>
...
</DSMLfunction_calls>
<function_results>
...
</function_results>
<think>...thinking about results</think>
Here are the functions available in JSONSchema format:
<functions>
{"name": "get_datetime", "description": "Get the current date and time", "parameters": {"type": "object", "properties": {"timezone": {"type": "string", "description": "The timezone, e.g. Asia/Shanghai, UTC"}}, "required": ["timezone"]}}
{"name": "get_weather", "description": "Get the weather for a specific date and location", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The city name, e.g. Beijing, Hangzhou"}, "date": {"type": "string", "description": "The date in YYYY-MM-DD format"}}, "required": ["location", "date"]}}
</functions>
You are a helpful Assistant.<User>明天杭州和北京的天气怎么样?<Assistant></think>
<DSMLfunction_calls>
<DSMLinvoke name="get_datetime">
<DSMLparameter name="timezone" string="true">Asia/Shanghai</DSMLparameter>
</DSMLinvoke>
</DSMLfunction_calls><end▁of▁sentence>
<function_results>
<result>{"current_date": "2024-01-15", "current_time": "14:30:00", "timezone": "Asia/Shanghai"}</result>
</function_results>
</think>
<DSMLfunction_calls>
<DSMLinvoke name="get_weather">
<DSMLparameter name="location" string="true">Hangzhou</DSMLparameter>
<DSMLparameter name="date" string="true">2024-01-16</DSMLparameter>
</DSMLinvoke>
<DSMLinvoke name="get_weather">
<DSMLparameter name="location" string="true">Beijing</DSMLparameter>
<DSMLparameter name="date" string="true">2024-01-16</DSMLparameter>
</DSMLinvoke>
</DSMLfunction_calls><end▁of▁sentence>
<function_results>
<result>{"location": "Hangzhou", "date": "2024-01-16", "temperature_high": "12", "temperature_low": "5", "weather": "多云", "humidity": "65%"}</result>
<result>{"location": "Beijing", "date": "2024-01-16", "temperature_high": "-2", "temperature_low": "-8", "weather": "晴", "humidity": "30%"}</result>
</function_results>
</think>根据查询结果,明天(2024年1月16日)的天气情况如下:
**杭州**
- 天气:多云
- 最高温度:12°C
- 最低温度:5°C
- 湿度:65%
**北京**
- 天气:晴
- 最高温度:-2°C
- 最低温度:-8°C
- 湿度:30%
杭州明天会比较温暖但有些多云,而北京会很冷但是晴天。建议在北京的朋友要注意保暖!<|end▁of▁sentence><User>谢谢!那后天呢?<Assistant><think>用户现在问后天的天气,后天是2024-01-17,我可以直接查询(因为已知今天日期)🗓️</think>
<DSMLfunction_calls>
<DSMLinvoke name="get_weather">
<DSMLparameter name="location" string="true">Hangzhou</DSMLparameter>
<DSMLparameter name="date" string="true">2024-01-17</DSMLparameter>
</DSMLinvoke>
<DSMLinvoke name="get_weather">
<DSMLparameter name="location" string="true">Beijing</DSMLparameter>
<DSMLparameter name="date" string="true">2024-01-17</DSMLparameter>
</DSMLinvoke>
</DSMLfunction_calls><end▁of▁sentence>
<function_results>
<result>{"location": "Hangzhou", "date": "2024-01-17", "temperature_high": "15", "temperature_low": "8", "weather": "小雨", "humidity": "80%"}</result>
<result>{"location": "Beijing", "date": "2024-01-17", "temperature_high": "0", "temperature_low": "-6", "weather": "多云", "humidity": "45%"}</result>
</function_results>
<think>获取到后天的天气数据,整理回复给用户📝</think>后天(2024年1月17日)的天气情况:
**杭州**
- 天气:小雨
- 最高温度:15°C
- 最低温度:8°C
- 湿度:80%
**北京**
- 天气:多云
- 最高温度:0°C
- 最低温度:-6°C
- 湿度:45%
杭州后天会有小雨,温度略有回升,记得带伞。北京会稍微暖和一点,但依然很冷,请继续做好保暖措施。<|end▁of▁sentence>
@@ -0,0 +1,31 @@
mod encoding;
use vllm_text::Prompt;
use super::{ChatRenderer, RenderedPrompt};
use crate::Result;
use crate::request::ChatRequest;
/// Dedicated DeepSeek V3.2 renderer.
#[derive(Debug, Clone, Copy, Default)]
pub struct DeepSeekV32ChatRenderer;
impl DeepSeekV32ChatRenderer {
/// Create the dedicated DeepSeek V3.2 renderer.
pub fn new() -> Self {
Self
}
}
impl ChatRenderer for DeepSeekV32ChatRenderer {
fn render(&self, request: &ChatRequest) -> Result<RenderedPrompt> {
request.validate()?;
Ok(RenderedPrompt {
prompt: Prompt::Text(encoding::render_request(request)?),
})
}
}
#[cfg(test)]
mod tests;
@@ -0,0 +1,422 @@
use std::fs;
use std::path::PathBuf;
use expect_test::{ExpectFile, expect, expect_file};
use serde::Deserialize;
use serde_json::{Value, json};
use thiserror_ext::AsReport;
use super::DeepSeekV32ChatRenderer;
use crate::error::Error;
use crate::event::{AssistantContentBlock, AssistantToolCall};
use crate::request::{
ChatContentPart, ChatMessage, ChatRequest, ChatTool, ChatToolChoice, GenerationPromptMode,
};
use crate::{ChatRenderer, ChatRole};
#[derive(Debug, Deserialize)]
struct FixtureRequest {
#[serde(default)]
tools: Vec<FixtureTool>,
messages: Vec<FixtureMessage>,
}
#[derive(Debug, Deserialize)]
struct FixtureTool {
function: FixtureToolFunction,
}
#[derive(Debug, Deserialize)]
struct FixtureToolFunction {
name: String,
description: Option<String>,
parameters: Value,
#[serde(default)]
strict: Option<bool>,
}
#[derive(Debug, Deserialize)]
#[serde(tag = "role", rename_all = "snake_case")]
enum FixtureMessage {
System {
content: String,
},
Developer {
content: String,
#[serde(default)]
tools: Vec<FixtureTool>,
},
User {
content: String,
},
Assistant {
#[serde(default)]
content: String,
#[serde(default)]
reasoning_content: String,
#[serde(default)]
tool_calls: Vec<FixtureToolCall>,
},
Tool {
content: String,
#[serde(default)]
tool_call_id: Option<String>,
},
}
#[derive(Debug, Deserialize)]
struct FixtureToolCall {
#[serde(default)]
id: Option<String>,
function: FixtureToolCallFunction,
}
#[derive(Debug, Deserialize)]
struct FixtureToolCallFunction {
name: String,
arguments: String,
}
fn render_request(request: &ChatRequest) -> String {
DeepSeekV32ChatRenderer::new()
.render(request)
.unwrap()
.prompt
.into_text()
.expect("deepseek renderer should return text prompt")
}
fn render_result(request: &ChatRequest) -> Result<String, Error> {
DeepSeekV32ChatRenderer::new().render(request).map(|rendered| {
rendered
.prompt
.into_text()
.expect("deepseek renderer should return text prompt")
})
}
fn thinking_request(messages: Vec<ChatMessage>) -> ChatRequest {
let mut request = ChatRequest {
request_id: "deepseek-v32-small-test".to_string(),
messages,
..ChatRequest::for_test()
};
if matches!(
request.messages.last().map(ChatMessage::role),
Some(ChatRole::Assistant)
) {
request.chat_options.generation_prompt_mode = GenerationPromptMode::NoGenerationPrompt;
}
request
.chat_options
.template_kwargs
.insert("thinking".to_string(), Value::Bool(true));
request
}
fn fixture_request(input_name: &str) -> ChatRequest {
let fixture = fs::read_to_string(fixture_path(input_name)).unwrap();
let fixture: FixtureRequest = serde_json::from_str(&fixture).unwrap();
let mut request = ChatRequest {
request_id: "deepseek-v32-fixture".to_string(),
messages: fixture
.messages
.into_iter()
.enumerate()
.map(|(index, message)| match message {
FixtureMessage::System { content } => ChatMessage::system(content),
FixtureMessage::Developer { content, tools } => ChatMessage::developer(
content,
(!tools.is_empty()).then(|| to_chat_tools(&tools)),
),
FixtureMessage::User { content } => ChatMessage::user(content),
FixtureMessage::Assistant {
content,
reasoning_content,
tool_calls,
} => {
let mut blocks = Vec::new();
if !reasoning_content.is_empty() {
blocks.push(AssistantContentBlock::Reasoning {
text: reasoning_content,
});
}
if !content.is_empty() {
blocks.push(AssistantContentBlock::Text { text: content });
}
blocks.extend(tool_calls.into_iter().enumerate().map(
|(tool_index, tool_call)| {
AssistantContentBlock::ToolCall(AssistantToolCall {
id: tool_call.id.unwrap_or_else(|| {
format!("fixture-tool-call-{index}-{tool_index}")
}),
name: tool_call.function.name,
arguments: tool_call.function.arguments,
})
},
));
ChatMessage::assistant_blocks(blocks)
}
FixtureMessage::Tool {
content,
tool_call_id,
} => ChatMessage::tool_response(
content,
tool_call_id.unwrap_or_else(|| format!("fixture-tool-response-{index}")),
),
})
.collect(),
tools: to_chat_tools(&fixture.tools),
tool_choice: if fixture.tools.is_empty() {
ChatToolChoice::None
} else {
ChatToolChoice::Auto
},
..ChatRequest::for_test()
};
if matches!(
request.messages.last().map(ChatMessage::role),
Some(ChatRole::Assistant)
) {
request.chat_options.generation_prompt_mode = GenerationPromptMode::NoGenerationPrompt;
}
request
.chat_options
.template_kwargs
.insert("thinking".to_string(), Value::Bool(true));
request
}
fn to_chat_tools(tools: &[FixtureTool]) -> Vec<ChatTool> {
tools
.iter()
.map(|tool| ChatTool {
name: tool.function.name.clone(),
description: tool.function.description.clone(),
parameters: tool.function.parameters.clone(),
strict: tool.function.strict,
})
.collect()
}
fn fixture_path(name: &str) -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("src/renderer/deepseek_v32")
.join("fixtures")
.join(name)
}
fn assert_fixture(input_name: &str, expected: ExpectFile) {
let request = fixture_request(input_name);
let rendered = render_request(&request);
expected.assert_eq(&rendered);
}
#[test]
fn renders_vllm_parity_prompt_for_request_level_tools_fixture() {
assert_fixture(
"test_input.json",
expect_file!["fixtures/test_output_vllm_parity.txt"],
);
}
#[test]
fn renders_official_search_fixture_without_date() {
assert_fixture(
"test_input_search_wo_date.json",
expect_file!["fixtures/test_output_search_wo_date.txt"],
);
}
#[test]
fn renders_official_search_fixture_with_date() {
assert_fixture(
"test_input_search_w_date.json",
expect_file!["fixtures/test_output_search_w_date.txt"],
);
}
#[test]
fn request_level_tools_are_lowered_as_synthetic_leading_system_message() {
let mut request = ChatRequest {
request_id: "deepseek-v32-tools".to_string(),
messages: vec![
ChatMessage::system("System prompt."),
ChatMessage::text(ChatRole::User, "Hello"),
],
tools: vec![ChatTool {
name: "lookup".to_string(),
description: Some("Look things up".to_string()),
parameters: json!({
"type": "object",
"properties": {
"query": {
"type": "string"
}
},
"required": ["query"]
}),
strict: None,
}],
tool_choice: ChatToolChoice::Auto,
..ChatRequest::for_test()
};
request
.chat_options
.template_kwargs
.insert("thinking".to_string(), Value::Bool(true));
let rendered = render_request(&request);
assert!(rendered.starts_with("<begin▁of▁sentence>\n\n## Tools\n"));
assert!(rendered.contains("</functions>\nSystem prompt."));
assert!(rendered.ends_with("<User>Hello<Assistant><think>"));
}
#[test]
fn developer_turn_is_treated_as_last_user_like_turn() {
let request = thinking_request(vec![ChatMessage::developer("Follow policy.", None)]);
let rendered = render_request(&request);
assert!(rendered.contains("# The user's message is: Follow policy."));
assert!(rendered.ends_with("<Assistant><think>"));
}
#[test]
fn historical_assistant_reasoning_is_dropped_before_final_user_turn() {
let request = thinking_request(vec![
ChatMessage::assistant_blocks(vec![
AssistantContentBlock::Reasoning {
text: "internal reasoning".to_string(),
},
AssistantContentBlock::Text {
text: "Visible answer.".to_string(),
},
]),
ChatMessage::user("What about the next one?"),
]);
let rendered = render_request(&request);
assert!(!rendered.contains("internal reasoning"));
assert!(rendered.contains("Visible answer.<end▁of▁sentence>"));
assert!(rendered.ends_with("<User>What about the next one?<Assistant><think>"));
}
#[test]
fn historical_assistant_reasoning_is_dropped_before_final_developer_turn() {
let request = thinking_request(vec![
ChatMessage::assistant_blocks(vec![
AssistantContentBlock::Reasoning {
text: "internal reasoning".to_string(),
},
AssistantContentBlock::Text {
text: "Visible answer.".to_string(),
},
]),
ChatMessage::developer("Follow the rubric.", None),
]);
let rendered = render_request(&request);
assert!(!rendered.contains("internal reasoning"));
assert!(rendered.contains("Visible answer.<end▁of▁sentence>"));
assert!(rendered.ends_with(
"<User>\n\n# The user's message is: Follow the rubric.<Assistant><think>"
));
}
#[test]
fn tool_results_after_last_user_resume_thinking() {
let request = thinking_request(vec![
ChatMessage::user("Check the weather."),
ChatMessage::assistant_blocks(vec![AssistantContentBlock::ToolCall(AssistantToolCall {
id: "call-weather".to_string(),
name: "weather".to_string(),
arguments: "{\"city\":\"Hangzhou\"}".to_string(),
})]),
ChatMessage::tool_response("{\"ok\":true}", "call-weather"),
]);
let rendered = render_request(&request);
assert!(rendered.contains(
"<User>Check the weather.<Assistant><think></think>\n\n<DSMLfunction_calls>"
));
assert!(rendered.ends_with("</function_results>\n\n<think>"));
}
#[test]
fn tool_results_follow_assistant_tool_call_id_order() {
let request = thinking_request(vec![
ChatMessage::user("Check two cities."),
ChatMessage::assistant_blocks(vec![
AssistantContentBlock::ToolCall(AssistantToolCall {
id: "call-hangzhou".to_string(),
name: "weather".to_string(),
arguments: "{\"city\":\"Hangzhou\"}".to_string(),
}),
AssistantContentBlock::ToolCall(AssistantToolCall {
id: "call-beijing".to_string(),
name: "weather".to_string(),
arguments: "{\"city\":\"Beijing\"}".to_string(),
}),
]),
ChatMessage::tool_response("{\"city\":\"Beijing\"}", "call-beijing"),
ChatMessage::tool_response("{\"city\":\"Hangzhou\"}", "call-hangzhou"),
]);
let rendered = render_request(&request);
assert!(rendered.contains(
"<function_results>\n<result>{\"city\":\"Hangzhou\"}</result>\n<result>{\"city\":\"Beijing\"}</result>\n</function_results>"
));
}
#[test]
fn tool_results_require_matching_tool_call_ids() {
let request = thinking_request(vec![
ChatMessage::user("Check the weather."),
ChatMessage::assistant_blocks(vec![AssistantContentBlock::ToolCall(AssistantToolCall {
id: "call-weather".to_string(),
name: "weather".to_string(),
arguments: "{\"city\":\"Hangzhou\"}".to_string(),
})]),
ChatMessage::tool_response("{\"ok\":true}", "call-unknown"),
]);
let error = render_result(&request).unwrap_err();
expect!["chat template error: invalid DeepSeek V3.2 tool message: unknown tool_call_id `call-unknown`"]
.assert_eq(&error.to_report_string());
}
#[test]
fn assistant_after_last_user_requires_reasoning_or_tool_calls() {
let request = thinking_request(vec![
ChatMessage::user("Hello"),
ChatMessage::assistant_text("Hi there."),
]);
let error = render_result(&request).unwrap_err();
expect!["chat template error: invalid DeepSeek V3.2 assistant message after last user message: expected reasoning or tool calls"]
.assert_eq(&error.to_report_string());
}
#[test]
fn render_rejects_multimodal_input() {
let request = ChatRequest {
messages: vec![ChatMessage::user(vec![ChatContentPart::image_url(
"data:image/png;base64,test",
)])],
..ChatRequest::for_test()
};
let error = DeepSeekV32ChatRenderer::new().render(&request).unwrap_err();
assert!(matches!(
error,
Error::UnsupportedMultimodalContent("image_url")
));
}
@@ -0,0 +1,558 @@
//! DeepSeek V4 prompt renderer.
//!
//! Original Python implementation:
//! <https://github.com/vllm-project/vllm/blob/main/vllm/tokenizers/deepseek_v4_encoding.py>
use std::collections::HashMap;
use std::fmt::Write as _;
use serde::Serialize;
use serde_json::Value;
use serde_json_fmt::JsonFormat;
use crate::error::{Error, Result};
use crate::request::{ChatContent, ChatMessage, ChatRequest, ChatTool, ReasoningEffort};
use crate::{AssistantContentBlock, AssistantMessageExt, AssistantToolCall};
const BOS_TOKEN: &str = "<begin▁of▁sentence>";
const EOS_TOKEN: &str = "<end▁of▁sentence>";
const THINKING_START_TOKEN: &str = "<think>";
const THINKING_END_TOKEN: &str = "</think>";
const DSML_TOKEN: &str = "DSML";
const USER_SP_TOKEN: &str = "<User>";
const ASSISTANT_SP_TOKEN: &str = "<Assistant>";
const REASONING_EFFORT_MAX: &str = concat!(
"Reasoning Effort: Absolute maximum with no shortcuts permitted.\n",
"You MUST be very thorough in your thinking and comprehensively decompose the problem to resolve the root cause, rigorously stress-testing your logic against all potential paths, edge cases, and adversarial scenarios.\n",
"Explicitly write out your entire deliberation process, documenting every intermediate step, considered alternative, and rejected hypothesis to ensure absolutely no assumption is left unchecked.\n\n",
);
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum ThinkingMode {
Chat,
Thinking,
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Serialize)]
struct RenderedToolSchema<'a> {
name: &'a str,
description: Option<&'a str>,
parameters: &'a Value,
strict: Option<bool>,
}
/// Render one chat request into the final prompt string.
pub(super) fn render_request(request: &ChatRequest) -> Result<String> {
let (thinking_mode, max_reasoning_effort) = resolve_thinking_options(request)?;
let request_tools = request_tools(request);
let synthetic_tool_system = needs_synthetic_tool_system(request, request_tools);
let drop_thinking = request.parse_template_bool("drop_thinking")?.unwrap_or(true)
&& !rendered_tools_present(request, request_tools);
let last_user_render_index =
find_last_user_render_index(request.messages.as_slice(), synthetic_tool_system);
let mut out = String::from(BOS_TOKEN);
if thinking_mode == ThinkingMode::Thinking && max_reasoning_effort {
out.push_str(REASONING_EFFORT_MAX);
}
let mut request_tools_attached = false;
let mut render_index = 0isize;
if synthetic_tool_system {
render_system_message(&mut out, None, request_tools)?;
request_tools_attached = true;
render_index += 1;
}
for (message_index, message) in request.messages.iter().enumerate() {
if is_following_tool_response(request.messages.as_slice(), message_index) {
continue;
}
let current_render_index = render_index;
render_index += 1;
match message {
ChatMessage::System { content } => {
let tools = if !request_tools_attached {
request_tools_attached = true;
request_tools
} else {
&[]
};
render_system_message(&mut out, Some(content), tools)?;
}
ChatMessage::Developer { content, tools } => {
render_developer_message(&mut out, content, tools.as_deref().unwrap_or(&[]))?;
}
ChatMessage::User { content } => render_user_message(&mut out, content)?,
ChatMessage::Assistant { content } => {
// Mirror Python: thinking block (reasoning + </think>) is
// emitted whenever thinking is active and reasoning isn't
// dropped - i.e. drop_thinking is off OR this turn lies
// strictly after the last user turn.
let emit_thinking_block = thinking_mode == ThinkingMode::Thinking
&& (!drop_thinking || current_render_index > last_user_render_index);
let append_eos = !(message_index + 1 == request.messages.len()
&& request.chat_options.continue_final_message());
render_assistant_message(&mut out, emit_thinking_block, append_eos, content)?;
}
ChatMessage::ToolResponse { .. } => {
render_tool_response_block(&mut out, request.messages.as_slice(), message_index)?;
}
}
if is_user_like_entry(message)
&& next_rendered_entry_is_assistant_or_end(request.messages.as_slice(), message_index)
{
write_assistant_transition(
&mut out,
thinking_mode,
drop_thinking,
current_render_index >= last_user_render_index,
);
}
}
Ok(out)
}
/// Resolve DeepSeek V4's thinking controls. Unlike the Python tokenizer
/// wrapper, the Rust renderer only consumes the typed top-level
/// `reasoning_effort`; the generic template-kwargs map is left for HF
/// templates.
fn resolve_thinking_options(request: &ChatRequest) -> Result<(ThinkingMode, bool)> {
let mut thinking_mode = match request.enable_thinking()?.unwrap_or(false) {
true => ThinkingMode::Thinking,
false => ThinkingMode::Chat,
};
let mut max_reasoning_effort = false;
match request.chat_options.reasoning_effort {
Some(ReasoningEffort::None) => thinking_mode = ThinkingMode::Chat,
Some(ReasoningEffort::Max | ReasoningEffort::XHigh) => max_reasoning_effort = true,
Some(_) | None => {}
}
Ok((thinking_mode, max_reasoning_effort))
}
/// Return request-level tools only when native tool parsing is enabled.
fn request_tools(request: &ChatRequest) -> &[ChatTool] {
if request.tool_parsing_enabled() {
request.tools.as_slice()
} else {
&[]
}
}
/// Return whether request tools need a synthetic leading system entry.
fn needs_synthetic_tool_system(request: &ChatRequest, request_tools: &[ChatTool]) -> bool {
!request_tools.is_empty()
&& !request
.messages
.iter()
.any(|message| matches!(message, ChatMessage::System { .. }))
}
/// Return whether any rendered message carries tool schemas.
fn rendered_tools_present(request: &ChatRequest, request_tools: &[ChatTool]) -> bool {
!request_tools.is_empty()
|| request.messages.iter().any(|message| {
matches!(
message,
ChatMessage::Developer {
tools: Some(tools),
..
} if !tools.is_empty()
)
})
}
/// Find the last user-like turn after inline tool-response merging.
fn find_last_user_render_index(messages: &[ChatMessage], synthetic_tool_system: bool) -> isize {
let mut render_index = isize::from(synthetic_tool_system);
let mut last_user_index = -1;
for (message_index, message) in messages.iter().enumerate() {
if is_following_tool_response(messages, message_index) {
continue;
}
if is_user_like_entry(message) {
last_user_index = render_index;
}
render_index += 1;
}
last_user_index
}
/// Return whether this tool message is already covered by a previous tool run.
fn is_following_tool_response(messages: &[ChatMessage], message_index: usize) -> bool {
matches!(messages[message_index], ChatMessage::ToolResponse { .. })
&& message_index > 0
&& matches!(
messages[message_index - 1],
ChatMessage::ToolResponse { .. }
)
}
/// Return whether one rendered entry should be treated as user-like.
fn is_user_like_entry(message: &ChatMessage) -> bool {
matches!(
message,
ChatMessage::Developer { .. } | ChatMessage::User { .. } | ChatMessage::ToolResponse { .. }
)
}
/// Return whether the next rendered entry is assistant, or there is no next
/// entry.
fn next_rendered_entry_is_assistant_or_end(messages: &[ChatMessage], message_index: usize) -> bool {
let mut next_index = message_index + 1;
if matches!(messages[message_index], ChatMessage::ToolResponse { .. }) {
while next_index < messages.len()
&& matches!(messages[next_index], ChatMessage::ToolResponse { .. })
{
next_index += 1;
}
}
messages
.get(next_index)
.map(|message| matches!(message, ChatMessage::Assistant { .. }))
.unwrap_or(true)
}
/// Render the tool preamble shown to the model, V4 flavor.
fn render_tools(out: &mut String, tools: &[ChatTool]) -> Result<()> {
out.push_str(
r#"## Tools
You have access to a set of tools to help answer the user's question. You can invoke tools by writing a "<DSMLtool_calls>" block like the following:
<DSMLtool_calls>
<DSMLinvoke name="$TOOL_NAME">
<DSMLparameter name="$PARAMETER_NAME" string="true|false">$PARAMETER_VALUE</DSMLparameter>
...
</DSMLinvoke>
<DSMLinvoke name="$TOOL_NAME2">
...
</DSMLinvoke>
</DSMLtool_calls>
String parameters should be specified as is and set `string="true"`. For all other types (numbers, booleans, arrays, objects), pass the value in JSON format and set `string="false"`.
If thinking_mode is enabled (triggered by <think>), you MUST output your complete reasoning inside <think>...</think> BEFORE any tool calls or final response.
Otherwise, output directly after </think> with tool calls or final response.
### Available Tool Schemas
"#,
);
for (index, tool) in tools.iter().enumerate() {
if index > 0 {
out.push('\n');
}
render_tool_schema(out, tool)?;
}
out.push_str(
"\n\nYou MUST strictly follow the above defined tool name and parameter schemas to invoke tool calls.\n",
);
Ok(())
}
/// Serialize one typed tool schema into the JSON shape embedded in the prompt.
fn render_tool_schema(out: &mut String, tool: &ChatTool) -> Result<()> {
out.push_str(&json_dumps(&RenderedToolSchema {
name: &tool.name,
description: tool.description.as_deref(),
parameters: &tool.parameters,
strict: tool.strict,
})?);
Ok(())
}
/// Render a system turn, optionally followed by the V4 tool preamble.
fn render_system_message(
out: &mut String,
content: Option<&ChatContent>,
tools: &[ChatTool],
) -> Result<()> {
if let Some(content) = content {
write_chat_content(out, content)?;
}
if !tools.is_empty() {
out.push_str("\n\n");
render_tools(out, tools)?;
}
Ok(())
}
/// Developer messages are rendered as user-like turns with optional tools.
fn render_developer_message(
out: &mut String,
content: &ChatContent,
tools: &[ChatTool],
) -> Result<()> {
if content.is_empty() {
return Err(Error::ChatTemplate(
"invalid DeepSeek V4 developer message: empty content".to_string(),
));
}
out.push_str(USER_SP_TOKEN);
write_chat_content(out, content)?;
if !tools.is_empty() {
out.push_str("\n\n");
render_tools(out, tools)?;
}
Ok(())
}
/// Render one plain user turn.
fn render_user_message(out: &mut String, content: &ChatContent) -> Result<()> {
out.push_str(USER_SP_TOKEN);
write_chat_content(out, content)?;
Ok(())
}
/// Render a contiguous tool-response run as one synthetic user turn.
fn render_tool_response_block(
out: &mut String,
messages: &[ChatMessage],
message_index: usize,
) -> Result<()> {
let (block_start, block_end) = tool_response_block_bounds(messages, message_index);
let sorted_indices = sorted_tool_response_indices(messages, block_start, block_end);
out.push_str(USER_SP_TOKEN);
for (offset, message_index) in sorted_indices.iter().enumerate() {
if offset > 0 {
out.push_str("\n\n");
}
let ChatMessage::ToolResponse { content, .. } = &messages[*message_index] else {
unreachable!("tool response block should only contain tool messages");
};
write_tool_result(out, content)?;
}
Ok(())
}
/// Return the contiguous tool-response block containing `actual_index`.
fn tool_response_block_bounds(messages: &[ChatMessage], actual_index: usize) -> (usize, usize) {
let mut block_start = actual_index;
while block_start > 0 && matches!(messages[block_start - 1], ChatMessage::ToolResponse { .. }) {
block_start -= 1;
}
let mut block_end = actual_index + 1;
while block_end < messages.len()
&& matches!(messages[block_end], ChatMessage::ToolResponse { .. })
{
block_end += 1;
}
(block_start, block_end)
}
fn sorted_tool_response_indices(
messages: &[ChatMessage],
block_start: usize,
block_end: usize,
) -> Vec<usize> {
let Some(tool_call_order) = last_tool_call_order_before(messages, block_start) else {
return (block_start..block_end).collect();
};
let mut indices = (block_start..block_end).collect::<Vec<_>>();
indices.sort_by_key(|index| {
let ChatMessage::ToolResponse { tool_call_id, .. } = &messages[*index] else {
unreachable!("tool response block should only contain tool messages");
};
tool_call_order.get(tool_call_id.as_str()).copied().unwrap_or(0)
});
indices
}
fn last_tool_call_order_before(
messages: &[ChatMessage],
message_index: usize,
) -> Option<HashMap<&str, usize>> {
let mut tool_call_order = None;
for message in &messages[..message_index] {
if let ChatMessage::Assistant { content } = message {
let order = content
.tool_calls()
.enumerate()
.map(|(index, tool_call)| (tool_call.id.as_str(), index))
.collect::<HashMap<_, _>>();
if !order.is_empty() {
tool_call_order = Some(order);
}
}
}
tool_call_order
}
/// Render one tool response payload inside a V4 `<tool_result>` block.
fn write_tool_result(out: &mut String, content: &ChatContent) -> Result<()> {
out.push_str("<tool_result>");
write_chat_content(out, content)?;
out.push_str("</tool_result>");
Ok(())
}
/// Append the assistant transition token after a user-like turn.
fn write_assistant_transition(
out: &mut String,
thinking_mode: ThinkingMode,
drop_thinking: bool,
opens_thinking: bool,
) {
out.push_str(ASSISTANT_SP_TOKEN);
if thinking_mode == ThinkingMode::Thinking && (!drop_thinking || opens_thinking) {
out.push_str(THINKING_START_TOKEN);
} else {
out.push_str(THINKING_END_TOKEN);
}
}
/// Render one assistant turn, including optional reasoning, DSML tool calls,
/// and the trailing EOS marker.
fn render_assistant_message(
out: &mut String,
emit_thinking_block: bool,
append_eos: bool,
content: &[AssistantContentBlock],
) -> Result<()> {
let has_tool_calls = content.has_tool_calls();
if emit_thinking_block {
if content.has_reasoning() {
write_assistant_reasoning(out, content);
}
out.push_str(THINKING_END_TOKEN);
}
write_assistant_text(out, content);
if has_tool_calls {
out.push_str("\n\n<DSMLtool_calls>\n");
for (index, tool_call) in content.tool_calls().enumerate() {
if index > 0 {
out.push('\n');
}
render_tool_call(out, tool_call)?;
}
out.push_str("\n</DSMLtool_calls>");
}
if append_eos {
out.push_str(EOS_TOKEN);
}
Ok(())
}
/// Render one assistant tool call in DSML XML-like format.
fn render_tool_call(out: &mut String, tool_call: &AssistantToolCall) -> Result<()> {
writeln!(out, "<{DSML_TOKEN}invoke name=\"{}\">", tool_call.name)
.expect("writing to String cannot fail");
encode_arguments_to_dsml(out, tool_call)?;
write!(out, "\n</{DSML_TOKEN}invoke>").expect("writing to String cannot fail");
Ok(())
}
/// Convert one assistant tool-call arguments object into DSML parameter form.
///
/// String values are emitted raw with `string="true"`, while all other JSON
/// values are rendered with JSON syntax and `string="false"`.
fn encode_arguments_to_dsml(out: &mut String, tool_call: &AssistantToolCall) -> Result<()> {
let arguments: Value = serde_json::from_str(&tool_call.arguments).map_err(|error| {
Error::ChatTemplate(format!(
"assistant tool call has invalid JSON arguments for DeepSeek V4: {error}"
))
})?;
let Some(arguments) = arguments.as_object() else {
return Err(Error::ChatTemplate(
"assistant tool call arguments for DeepSeek V4 must be a JSON object".to_string(),
));
};
let mut wrote_parameter = false;
for (key, value) in arguments {
if wrote_parameter {
out.push('\n');
}
let is_string = matches!(value, Value::String(_));
write!(
out,
"<{DSML_TOKEN}parameter name=\"{key}\" string=\"{}\">",
if is_string { "true" } else { "false" }
)
.expect("writing to String cannot fail");
match value {
Value::String(value) => out.push_str(value),
value => out.push_str(&json_dumps(value)?),
}
write!(out, "</{DSML_TOKEN}parameter>").expect("writing to String cannot fail");
wrote_parameter = true;
}
Ok(())
}
/// Write chat content directly into the destination buffer without flattening
/// it into an intermediate `String`.
fn write_chat_content(out: &mut String, content: &ChatContent) -> Result<()> {
match content {
ChatContent::Text(text) => out.push_str(text),
ChatContent::Parts(parts) => {
for part in parts {
out.push_str(part.as_text()?);
}
}
}
Ok(())
}
/// Write all reasoning blocks in encounter order.
fn write_assistant_reasoning(out: &mut String, content: &[AssistantContentBlock]) {
for block in content {
if let AssistantContentBlock::Reasoning { text } = block {
out.push_str(text);
}
}
}
/// Write all visible assistant text blocks in encounter order.
fn write_assistant_text(out: &mut String, content: &[AssistantContentBlock]) {
for block in content {
if let AssistantContentBlock::Text { text } = block {
out.push_str(text);
}
}
}
/// Compact JSON serialization used by this renderer for exact prompt text.
fn json_dumps<T: Serialize>(value: &T) -> Result<String> {
JsonFormat::new()
.comma(", ")
.expect("literal comma separator is valid JSON")
.colon(": ")
.expect("literal colon separator is valid JSON")
.ascii(false)
.format_to_string(value)
.map_err(|error| {
Error::ChatTemplate(format!(
"failed to serialize DeepSeek V4 JSON payload: {error}"
))
})
}
@@ -0,0 +1,81 @@
{
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the weather for a specific location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city name"
},
"unit": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
"description": "Temperature unit"
}
},
"required": ["location"]
}
}
},
{
"type": "function",
"function": {
"name": "search",
"description": "Search the web for information",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Search query"
},
"num_results": {
"type": "integer",
"description": "Number of results to return"
}
},
"required": ["query"]
}
}
}
],
"messages": [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "What's the weather in Beijing?"
},
{
"role": "assistant",
"reasoning_content": "The user wants to know the weather in Beijing. I should use the get_weather tool.",
"tool_calls": [
{
"id": "call_001",
"type": "function",
"function": {
"name": "get_weather",
"arguments": "{\"location\": \"Beijing\", \"unit\": \"celsius\"}"
}
}
]
},
{
"role": "tool",
"tool_call_id": "call_001",
"content": "{\"temperature\": 22, \"condition\": \"sunny\", \"humidity\": 45}"
},
{
"role": "assistant",
"reasoning_content": "Got the weather data. Let me format a nice response.",
"content": "The weather in Beijing is currently sunny with a temperature of 22°C and 45% humidity."
}
]
}
@@ -0,0 +1,24 @@
[
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Hello"
},
{
"role": "assistant",
"reasoning_content": "The user said hello, I should greet back.",
"content": "Hi there! How can I help you?"
},
{
"role": "user",
"content": "What is the capital of France?"
},
{
"role": "assistant",
"reasoning_content": "The user asks about the capital of France. It is Paris.",
"content": "The capital of France is Paris."
}
]
@@ -0,0 +1,36 @@
<begin▁of▁sentence>You are a helpful assistant.
## Tools
You have access to a set of tools to help answer the user's question. You can invoke tools by writing a "<DSMLtool_calls>" block like the following:
<DSMLtool_calls>
<DSMLinvoke name="$TOOL_NAME">
<DSMLparameter name="$PARAMETER_NAME" string="true|false">$PARAMETER_VALUE</DSMLparameter>
...
</DSMLinvoke>
<DSMLinvoke name="$TOOL_NAME2">
...
</DSMLinvoke>
</DSMLtool_calls>
String parameters should be specified as is and set `string="true"`. For all other types (numbers, booleans, arrays, objects), pass the value in JSON format and set `string="false"`.
If thinking_mode is enabled (triggered by <think>), you MUST output your complete reasoning inside <think>...</think> BEFORE any tool calls or final response.
Otherwise, output directly after </think> with tool calls or final response.
### Available Tool Schemas
{"name": "get_weather", "description": "Get the weather for a specific location", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The city name"}, "unit": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "Temperature unit"}}, "required": ["location"]}}
{"name": "search", "description": "Search the web for information", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "Search query"}, "num_results": {"type": "integer", "description": "Number of results to return"}}, "required": ["query"]}}
You MUST strictly follow the above defined tool name and parameter schemas to invoke tool calls.
<User>What's the weather in Beijing?<Assistant><think>The user wants to know the weather in Beijing. I should use the get_weather tool.</think>
<DSMLtool_calls>
<DSMLinvoke name="get_weather">
<DSMLparameter name="location" string="true">Beijing</DSMLparameter>
<DSMLparameter name="unit" string="true">celsius</DSMLparameter>
</DSMLinvoke>
</DSMLtool_calls><end▁of▁sentence><User><tool_result>{"temperature": 22, "condition": "sunny", "humidity": 45}</tool_result><Assistant><think>Got the weather data. Let me format a nice response.</think>The weather in Beijing is currently sunny with a temperature of 22°C and 45% humidity.<end▁of▁sentence>
@@ -0,0 +1 @@
<begin▁of▁sentence>You are a helpful assistant.<User>Hello<Assistant></think>Hi there! How can I help you?<end▁of▁sentence><User>What is the capital of France?<Assistant><think>The user asks about the capital of France. It is Paris.</think>The capital of France is Paris.<end▁of▁sentence>
@@ -0,0 +1,30 @@
mod encoding;
use vllm_text::Prompt;
use super::{ChatRenderer, RenderedPrompt};
use crate::Result;
use crate::request::ChatRequest;
/// Dedicated DeepSeek V4 renderer.
#[derive(Debug, Clone, Copy, Default)]
pub struct DeepSeekV4ChatRenderer;
impl DeepSeekV4ChatRenderer {
pub fn new() -> Self {
Self
}
}
impl ChatRenderer for DeepSeekV4ChatRenderer {
fn render(&self, request: &ChatRequest) -> Result<RenderedPrompt> {
request.validate()?;
Ok(RenderedPrompt {
prompt: Prompt::Text(encoding::render_request(request)?),
})
}
}
#[cfg(test)]
mod tests;
@@ -0,0 +1,369 @@
use std::fs;
use std::path::PathBuf;
use expect_test::{ExpectFile, expect, expect_file};
use serde::Deserialize;
use serde_json::Value;
use super::DeepSeekV4ChatRenderer;
use crate::event::{AssistantContentBlock, AssistantToolCall};
use crate::request::{
ChatMessage, ChatRequest, ChatTool, ChatToolChoice, GenerationPromptMode, ReasoningEffort,
};
use crate::{ChatRenderer, ChatRole};
#[derive(Debug, Deserialize)]
#[serde(untagged)]
enum FixtureFile {
WithTools(FixtureRequest),
MessagesOnly(Vec<FixtureMessage>),
}
#[derive(Debug, Deserialize)]
struct FixtureRequest {
#[serde(default)]
tools: Vec<FixtureTool>,
messages: Vec<FixtureMessage>,
}
impl FixtureFile {
fn into_parts(self) -> (Vec<FixtureTool>, Vec<FixtureMessage>) {
match self {
Self::WithTools(req) => (req.tools, req.messages),
Self::MessagesOnly(messages) => (Vec::new(), messages),
}
}
}
#[derive(Debug, Deserialize)]
struct FixtureTool {
function: FixtureToolFunction,
}
#[derive(Debug, Deserialize)]
struct FixtureToolFunction {
name: String,
description: Option<String>,
parameters: Value,
#[serde(default)]
strict: Option<bool>,
}
#[derive(Debug, Deserialize)]
#[serde(tag = "role", rename_all = "snake_case")]
enum FixtureMessage {
System {
content: String,
},
Developer {
content: String,
#[serde(default)]
tools: Vec<FixtureTool>,
},
User {
content: String,
},
Assistant {
#[serde(default)]
content: String,
#[serde(default)]
reasoning_content: String,
#[serde(default)]
tool_calls: Vec<FixtureToolCall>,
},
Tool {
content: String,
#[serde(default)]
tool_call_id: Option<String>,
},
}
#[derive(Debug, Deserialize)]
struct FixtureToolCall {
#[serde(default)]
id: Option<String>,
function: FixtureToolCallFunction,
}
#[derive(Debug, Deserialize)]
struct FixtureToolCallFunction {
name: String,
arguments: String,
}
fn render_request(request: &ChatRequest) -> String {
DeepSeekV4ChatRenderer::new()
.render(request)
.unwrap()
.prompt
.into_text()
.expect("deepseek v4 renderer should return text prompt")
}
fn fixture_request(input_name: &str) -> ChatRequest {
let fixture = fs::read_to_string(fixture_path(input_name)).unwrap();
let fixture: FixtureFile = serde_json::from_str(&fixture).unwrap();
let (fixture_tools, fixture_messages) = fixture.into_parts();
let mut request = ChatRequest {
request_id: "deepseek-v4-fixture".to_string(),
messages: fixture_messages
.into_iter()
.enumerate()
.map(|(index, message)| match message {
FixtureMessage::System { content } => ChatMessage::system(content),
FixtureMessage::Developer { content, tools } => ChatMessage::developer(
content,
(!tools.is_empty()).then(|| to_chat_tools(&tools)),
),
FixtureMessage::User { content } => ChatMessage::user(content),
FixtureMessage::Assistant {
content,
reasoning_content,
tool_calls,
} => {
let mut blocks = Vec::new();
if !reasoning_content.is_empty() {
blocks.push(AssistantContentBlock::Reasoning {
text: reasoning_content,
});
}
if !content.is_empty() {
blocks.push(AssistantContentBlock::Text { text: content });
}
blocks.extend(tool_calls.into_iter().enumerate().map(
|(tool_index, tool_call)| {
AssistantContentBlock::ToolCall(AssistantToolCall {
id: tool_call.id.unwrap_or_else(|| {
format!("fixture-tool-call-{index}-{tool_index}")
}),
name: tool_call.function.name,
arguments: tool_call.function.arguments,
})
},
));
ChatMessage::assistant_blocks(blocks)
}
FixtureMessage::Tool {
content,
tool_call_id,
} => ChatMessage::tool_response(
content,
tool_call_id.unwrap_or_else(|| format!("fixture-tool-response-{index}")),
),
})
.collect(),
tools: to_chat_tools(&fixture_tools),
tool_choice: if fixture_tools.is_empty() {
ChatToolChoice::None
} else {
ChatToolChoice::Auto
},
..ChatRequest::for_test()
};
if matches!(
request.messages.last().map(ChatMessage::role),
Some(ChatRole::Assistant)
) {
request.chat_options.generation_prompt_mode = GenerationPromptMode::NoGenerationPrompt;
}
request
.chat_options
.template_kwargs
.insert("thinking".to_string(), Value::Bool(true));
request
}
fn to_chat_tools(tools: &[FixtureTool]) -> Vec<ChatTool> {
tools
.iter()
.map(|tool| ChatTool {
name: tool.function.name.clone(),
description: tool.function.description.clone(),
parameters: tool.function.parameters.clone(),
strict: tool.function.strict,
})
.collect()
}
fn fixture_path(name: &str) -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("src/renderer/deepseek_v4")
.join("fixtures")
.join(name)
}
fn assert_fixture(input_name: &str, expected: ExpectFile) {
let request = fixture_request(input_name);
let rendered = render_request(&request);
expected.assert_eq(&rendered);
}
#[test]
fn renders_v4_fixture_1_tool_call_round_trip() {
assert_fixture(
"test_input_1.json",
expect_file!["fixtures/test_output_1.txt"],
);
}
#[test]
fn renders_v4_fixture_2_multi_turn_drop_thinking() {
assert_fixture(
"test_input_2.json",
expect_file!["fixtures/test_output_2.txt"],
);
}
#[test]
fn reasoning_effort_max_adds_prefix_when_thinking_is_enabled() {
let mut request = ChatRequest {
messages: vec![ChatMessage::user("solve it")],
..ChatRequest::for_test()
};
request
.chat_options
.template_kwargs
.insert("thinking".to_string(), Value::Bool(true));
request.chat_options.reasoning_effort = Some(ReasoningEffort::Max);
let rendered = render_request(&request);
expect![[r#"
<beginofsentence>Reasoning Effort: Absolute maximum with no shortcuts permitted.
You MUST be very thorough in your thinking and comprehensively decompose the problem to resolve the root cause, rigorously stress-testing your logic against all potential paths, edge cases, and adversarial scenarios.
Explicitly write out your entire deliberation process, documenting every intermediate step, considered alternative, and rejected hypothesis to ensure absolutely no assumption is left unchecked.
<User>solve it<Assistant><think>"#]]
.assert_eq(&rendered);
}
#[test]
fn reasoning_effort_none_disables_thinking() {
let mut request = ChatRequest {
messages: vec![ChatMessage::user("answer directly")],
..ChatRequest::for_test()
};
request
.chat_options
.template_kwargs
.insert("thinking".to_string(), Value::Bool(true));
request.chat_options.reasoning_effort = Some(ReasoningEffort::None);
let rendered = render_request(&request);
expect!["<begin▁of▁sentence><User>answer directly<Assistant></think>"]
.assert_eq(&rendered);
}
#[test]
fn reasoning_effort_template_kwarg_is_ignored() {
let mut request = ChatRequest {
messages: vec![ChatMessage::user("solve it")],
..ChatRequest::for_test()
};
request
.chat_options
.template_kwargs
.insert("thinking".to_string(), Value::Bool(true));
request.chat_options.template_kwargs.insert(
"reasoning_effort".to_string(),
Value::String("max".to_string()),
);
let rendered = render_request(&request);
expect!["<begin▁of▁sentence><User>solve it<Assistant><think>"].assert_eq(&rendered);
}
#[test]
fn tool_results_are_sorted_by_previous_assistant_tool_call_order() {
let request = ChatRequest {
messages: vec![
ChatMessage::assistant_blocks(vec![
AssistantContentBlock::ToolCall(AssistantToolCall {
id: "second".to_string(),
name: "second_tool".to_string(),
arguments: "{}".to_string(),
}),
AssistantContentBlock::ToolCall(AssistantToolCall {
id: "first".to_string(),
name: "first_tool".to_string(),
arguments: "{}".to_string(),
}),
]),
ChatMessage::tool_response("first result", "first"),
ChatMessage::tool_response("second result", "second"),
],
..ChatRequest::for_test()
};
let rendered = render_request(&request);
expect![[r#"
<beginofsentence>
<DSMLtool_calls>
<DSMLinvoke name="second_tool">
</DSMLinvoke>
<DSMLinvoke name="first_tool">
</DSMLinvoke>
</DSMLtool_calls><endofsentence><User><tool_result>second result</tool_result>
<tool_result>first result</tool_result><Assistant></think>"#]]
.assert_eq(&rendered);
}
#[test]
fn drop_thinking_false_keeps_prior_assistant_reasoning() {
let mut request = ChatRequest {
messages: vec![
ChatMessage::assistant_blocks(vec![
AssistantContentBlock::Reasoning {
text: "old reasoning".to_string(),
},
AssistantContentBlock::Text {
text: "old answer".to_string(),
},
]),
ChatMessage::user("next"),
],
..ChatRequest::for_test()
};
request
.chat_options
.template_kwargs
.insert("thinking".to_string(), Value::Bool(true));
request
.chat_options
.template_kwargs
.insert("drop_thinking".to_string(), Value::Bool(false));
let rendered = render_request(&request);
expect!(
"<begin▁of▁sentence>old reasoning</think>old answer<end▁of▁sentence><User>next<Assistant><think>"
)
.assert_eq(&rendered);
}
#[test]
fn continue_final_assistant_omits_final_eos() {
let request = ChatRequest {
messages: vec![
ChatMessage::user("write"),
ChatMessage::assistant_text("partial answer"),
],
chat_options: crate::request::ChatOptions {
generation_prompt_mode: GenerationPromptMode::ContinueFinalAssistant,
..Default::default()
},
..ChatRequest::for_test()
};
let rendered = render_request(&request);
expect!["<begin▁of▁sentence><User>write<Assistant></think>partial answer"]
.assert_eq(&rendered);
}
+15
View File
@@ -0,0 +1,15 @@
use thiserror::Error as ThisError;
#[derive(Debug, ThisError)]
pub(crate) enum TemplateError {
#[error("failed to render jinja template")]
Jinja(#[from] minijinja::Error),
#[error("failed to read chat template file")]
ReadTemplateFile(#[source] std::io::Error),
#[error("chat template looks like a file path but does not exist")]
MissingTemplatePath,
#[error("failed to parse chat_template.json")]
ParseTemplateJson(#[source] serde_json::Error),
#[error("chat_template.json does not contain a valid template")]
InvalidTemplateJson,
}
+400
View File
@@ -0,0 +1,400 @@
use std::collections::{HashSet, VecDeque};
use std::fmt;
use std::str::FromStr;
use minijinja::machinery::ast::{Expr, ForLoop, Set, Stmt};
use minijinja::machinery::{WhitespaceConfig, parse};
use minijinja::syntax::SyntaxConfig;
use serde_with::DeserializeFromStr;
/// Chat template content format.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum ChatTemplateContentFormat {
/// Content is a simple string.
#[default]
String,
/// Content is a list of structured parts (OpenAI format).
OpenAi,
}
/// Configurable chat-template content format selection.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, DeserializeFromStr)]
pub enum ChatTemplateContentFormatOption {
/// Detect the format from the template source.
#[default]
Auto,
/// Always flatten content into plain strings before rendering.
String,
/// Always pass content through in OpenAI-compatible structured form.
OpenAi,
}
impl ChatTemplateContentFormatOption {
pub const AUTO_LITERAL: &str = "auto";
pub const OPENAI_LITERAL: &str = "openai";
pub const STRING_LITERAL: &str = "string";
}
impl FromStr for ChatTemplateContentFormatOption {
type Err = String;
fn from_str(value: &str) -> Result<Self, Self::Err> {
if value.eq_ignore_ascii_case(Self::AUTO_LITERAL) {
Ok(Self::Auto)
} else if value.eq_ignore_ascii_case(Self::STRING_LITERAL) {
Ok(Self::String)
} else if value.eq_ignore_ascii_case(Self::OPENAI_LITERAL) {
Ok(Self::OpenAi)
} else {
Err(format!(
"invalid content format `{value}`; expected one of: {}, {}, {}",
Self::AUTO_LITERAL,
Self::STRING_LITERAL,
Self::OPENAI_LITERAL
))
}
}
}
impl fmt::Display for ChatTemplateContentFormatOption {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Auto => f.write_str(Self::AUTO_LITERAL),
Self::String => f.write_str(Self::STRING_LITERAL),
Self::OpenAi => f.write_str(Self::OPENAI_LITERAL),
}
}
}
fn is_var_access(expr: &Expr, varname: &str) -> bool {
matches!(expr, Expr::Var(v) if v.id == varname)
}
fn is_const_str(expr: &Expr, value: &str) -> bool {
matches!(expr, Expr::Const(c) if c.value.as_str() == Some(value))
}
fn is_attr_access(expr: &Expr, varname: &str, key: &str) -> bool {
match expr {
Expr::GetItem(g) => is_var_access(&g.expr, varname) && is_const_str(&g.subscript_expr, key),
Expr::GetAttr(g) => is_var_access(&g.expr, varname) && g.name == key,
_ => false,
}
}
fn is_var_or_elems_access(expr: &Expr, varname: &str, key: Option<&str>) -> bool {
match expr {
Expr::Filter(f) => {
f.expr.as_ref().is_some_and(|inner| is_var_or_elems_access(inner, varname, key))
}
Expr::Test(t) => is_var_or_elems_access(&t.expr, varname, key),
Expr::Slice(s) => is_var_or_elems_access(&s.expr, varname, key),
_ => key.map_or_else(
|| is_var_access(expr, varname),
|key| is_attr_access(expr, varname, key),
),
}
}
fn visit_stmt<'a>(
stmt: &'a Stmt<'a>,
assignments: &mut Vec<&'a Set<'a>>,
loops: &mut Vec<&'a ForLoop<'a>>,
) {
match stmt {
Stmt::Template(t) => {
for child in &t.children {
visit_stmt(child, assignments, loops);
}
}
Stmt::ForLoop(fl) => {
loops.push(fl);
for child in &fl.body {
visit_stmt(child, assignments, loops);
}
for child in &fl.else_body {
visit_stmt(child, assignments, loops);
}
}
Stmt::IfCond(ic) => {
for child in &ic.true_body {
visit_stmt(child, assignments, loops);
}
for child in &ic.false_body {
visit_stmt(child, assignments, loops);
}
}
Stmt::WithBlock(wb) => {
for child in &wb.body {
visit_stmt(child, assignments, loops);
}
}
Stmt::Set(set_stmt) => assignments.push(set_stmt),
Stmt::SetBlock(sb) => {
for child in &sb.body {
visit_stmt(child, assignments, loops);
}
}
Stmt::AutoEscape(ae) => {
for child in &ae.body {
visit_stmt(child, assignments, loops);
}
}
Stmt::FilterBlock(fb) => {
for child in &fb.body {
visit_stmt(child, assignments, loops);
}
}
Stmt::Block(b) => {
for child in &b.body {
visit_stmt(child, assignments, loops);
}
}
Stmt::Macro(m) => {
for child in &m.body {
visit_stmt(child, assignments, loops);
}
}
Stmt::CallBlock(cb) => {
for child in &cb.macro_decl.body {
visit_stmt(child, assignments, loops);
}
}
_ => {}
}
}
fn collect_assignments_and_loops<'a>(
root: &'a Stmt<'a>,
) -> (Vec<&'a Set<'a>>, Vec<&'a ForLoop<'a>>) {
let mut assignments = Vec::new();
let mut loops = Vec::new();
visit_stmt(root, &mut assignments, &mut loops);
(assignments, loops)
}
fn iter_nodes_assign_var_or_elems(root: &Stmt<'_>, varname: &str) -> Vec<String> {
let (assignments, _) = collect_assignments_and_loops(root);
let mut discovered = vec![varname.to_string()];
let mut seen = HashSet::from([varname.to_string()]);
let mut related = VecDeque::from([varname.to_string()]);
while let Some(related_varname) = related.pop_front() {
for assign in &assignments {
let Expr::Var(lhs) = &assign.target else {
continue;
};
if is_var_or_elems_access(&assign.expr, &related_varname, None) {
let lhs_name = lhs.id.to_string();
if seen.insert(lhs_name.clone()) {
discovered.push(lhs_name.clone());
if lhs_name != related_varname {
related.push_back(lhs_name);
}
}
}
}
}
discovered
}
fn iter_nodes_assign_messages_item(root: &Stmt<'_>) -> Vec<String> {
let message_varnames = iter_nodes_assign_var_or_elems(root, "messages");
let (_, loops) = collect_assignments_and_loops(root);
let mut discovered = Vec::new();
let mut seen = HashSet::new();
for loop_ast in loops {
let Expr::Var(target) = &loop_ast.target else {
continue;
};
if message_varnames
.iter()
.any(|varname| is_var_or_elems_access(&loop_ast.iter, varname, None))
{
let target_name = target.id.to_string();
if seen.insert(target_name.clone()) {
discovered.push(target_name);
}
}
}
discovered
}
fn has_content_item_loop(root: &Stmt<'_>) -> bool {
let message_varnames = iter_nodes_assign_messages_item(root);
let (_, loops) = collect_assignments_and_loops(root);
loops.into_iter().any(|loop_ast| {
matches!(loop_ast.target, Expr::Var(_))
&& message_varnames
.iter()
.any(|varname| is_var_or_elems_access(&loop_ast.iter, varname, Some("content")))
})
}
/// Detect the content format expected by a Jinja2 chat template based on AST
/// analysis.
pub fn detect_chat_template_content_format(template: &str) -> ChatTemplateContentFormat {
let ast = match parse(
template,
"template",
SyntaxConfig {},
WhitespaceConfig::default(),
) {
Ok(ast) => ast,
Err(_) => return ChatTemplateContentFormat::String,
};
if has_content_item_loop(&ast) {
ChatTemplateContentFormat::OpenAi
} else {
ChatTemplateContentFormat::String
}
}
#[cfg(test)]
mod tests {
use std::fs;
use std::path::{Path, PathBuf};
use expect_test::expect;
use super::{ChatTemplateContentFormat, detect_chat_template_content_format};
fn detect(template: &str) -> ChatTemplateContentFormat {
detect_chat_template_content_format(template)
}
fn vllm_examples_dir() -> PathBuf {
Path::new(env!("CARGO_MANIFEST_DIR"))
.join("tests/templates/vllm_examples")
.canonicalize()
.expect("vLLM example template directory should exist locally")
}
fn read_vllm_example(relative_path: &str) -> String {
fs::read_to_string(vllm_examples_dir().join(relative_path))
.unwrap_or_else(|_| panic!("failed to read vLLM example template: {relative_path}"))
}
fn iter_vllm_example_template_paths() -> impl Iterator<Item = PathBuf> {
let mut paths = fs::read_dir(vllm_examples_dir())
.expect("failed to read vLLM example template directory")
.map(|entry| entry.expect("failed to read vLLM example template dir entry").path())
.filter(|path| path.extension().is_some_and(|ext| ext == "jinja"))
.collect::<Vec<_>>();
paths.sort();
paths.into_iter()
}
#[test]
fn detects_string_template_without_content_loop() {
assert_eq!(
detect("{% for message in messages %}{{ message.content }}{% endfor %}"),
ChatTemplateContentFormat::String
);
}
#[test]
fn detects_openai_template_with_direct_content_loop() {
assert_eq!(
detect(
"{% for message in messages %}{% for content in message['content'] %}{{ content }}{% endfor %}{% endfor %}"
),
ChatTemplateContentFormat::OpenAi
);
}
#[test]
fn detects_openai_template_with_messages_alias() {
assert_eq!(
detect(
"{% set msgs = messages %}{% for message in msgs %}{% for content in message.content %}{{ content }}{% endfor %}{% endfor %}"
),
ChatTemplateContentFormat::OpenAi
);
}
#[test]
fn does_not_detect_content_alias_loop_as_openai() {
assert_eq!(
detect(
"{% for message in messages %}{% set parts = message.content %}{% for item in parts %}{{ item }}{% endfor %}{% endfor %}"
),
ChatTemplateContentFormat::String
);
}
#[test]
fn does_not_treat_length_or_index_access_as_openai() {
assert_eq!(
detect("{% for message in messages %}{{ message.content|length }}{% endfor %}"),
ChatTemplateContentFormat::String
);
assert_eq!(
detect("{% for message in messages %}{{ message.content[0] }}{% endfor %}"),
ChatTemplateContentFormat::String
);
}
#[test]
fn matches_vllm_example_template_formats() {
let snapshot = iter_vllm_example_template_paths()
.map(|path| {
let file_name = path
.file_name()
.and_then(|name| name.to_str())
.expect("template file name should be valid UTF-8");
let template = read_vllm_example(file_name);
let format = detect(&template);
format!("{file_name:50} => {format:?}")
})
.collect::<Vec<_>>()
.join("\n");
expect![[r#"
template_alpaca.jinja => String
template_baichuan.jinja => String
template_chatglm.jinja => String
template_chatglm2.jinja => String
template_chatml.jinja => String
template_falcon.jinja => String
template_falcon_180b.jinja => String
template_inkbot.jinja => String
template_teleflm.jinja => String
tool_chat_template_deepseekr1.jinja => String
tool_chat_template_deepseekv3.jinja => String
tool_chat_template_deepseekv31.jinja => String
tool_chat_template_functiongemma.jinja => String
tool_chat_template_gemma3_pythonic.jinja => OpenAi
tool_chat_template_gemma4.jinja => OpenAi
tool_chat_template_glm4.jinja => String
tool_chat_template_granite.jinja => String
tool_chat_template_granite_20b_fc.jinja => String
tool_chat_template_hermes.jinja => String
tool_chat_template_hunyuan_a13b.jinja => String
tool_chat_template_internlm2_tool.jinja => String
tool_chat_template_llama3.1_json.jinja => OpenAi
tool_chat_template_llama3.2_json.jinja => OpenAi
tool_chat_template_llama3.2_pythonic.jinja => String
tool_chat_template_llama4_json.jinja => OpenAi
tool_chat_template_llama4_pythonic.jinja => OpenAi
tool_chat_template_minimax_m1.jinja => OpenAi
tool_chat_template_mistral.jinja => String
tool_chat_template_mistral3.jinja => OpenAi
tool_chat_template_mistral_parallel.jinja => String
tool_chat_template_phi4_mini.jinja => String
tool_chat_template_qwen3coder.jinja => String
tool_chat_template_toolace.jinja => String
tool_chat_template_xlam_llama.jinja => String
tool_chat_template_xlam_qwen.jinja => String"#]]
.assert_eq(&snapshot);
}
}
+970
View File
@@ -0,0 +1,970 @@
use std::collections::HashMap;
use serde::Serialize;
use serde_json::Value;
use thiserror_ext::AsReport as _;
use tracing::{info, trace, warn};
use vllm_text::Prompt;
use vllm_text::backend::hf::{
HfSpecialTokens, HfTokenizerConfig, ResolvedModelFiles, load_tokenizer_config,
};
use self::format::{
ChatTemplateContentFormat, ChatTemplateContentFormatOption as ContentFormatOption,
};
use self::template::{CompiledChatTemplate, TemplateContext};
use super::{ChatRenderer, RenderedPrompt};
use crate::error::Result;
use crate::request::{ChatContent, ChatContentPart, ChatMessage, ChatRequest};
use crate::{
AssistantContentBlock, AssistantMessageExt, ChatTool, Error, LoadModelBackendsOptions,
};
mod error;
mod format;
mod template;
mod tojson;
pub use template::{load_chat_template, resolve_chat_template};
pub use self::format::ChatTemplateContentFormatOption;
#[derive(Debug, Clone)]
pub struct MultimodalRenderInfo {
pub placeholder_token: String,
}
/// Hugging Face chat-template renderer backed by the local Jinja chat-template
/// state.
pub struct HfChatRenderer {
default_template: Option<CompiledChatTemplate>,
default_template_kwargs: HashMap<String, Value>,
content_format: ContentFormatOption,
special_tokens: Option<HfSpecialTokens>,
multimodal: Option<MultimodalRenderInfo>,
}
impl HfChatRenderer {
/// Create a renderer from the given template string.
pub fn new(
template: Option<String>,
default_template_kwargs: HashMap<String, Value>,
content_format: ContentFormatOption,
) -> Result<Self> {
Ok(Self {
default_template: template
.map(|template| {
CompiledChatTemplate::new(template, content_format)
.map_err(|error| Error::ChatTemplate(error.to_report_string()))
})
.transpose()?,
default_template_kwargs,
content_format,
special_tokens: None,
multimodal: None,
})
}
pub fn with_special_tokens(mut self, special_tokens: Option<HfSpecialTokens>) -> Self {
self.special_tokens = special_tokens;
self
}
pub fn with_multimodal(mut self, multimodal: Option<MultimodalRenderInfo>) -> Self {
self.multimodal = multimodal;
self
}
/// Create a renderer from the given model files and loading options.
pub fn load(
files: &ResolvedModelFiles,
options: LoadModelBackendsOptions,
multimodal: Option<MultimodalRenderInfo>,
) -> Result<Self> {
let HfTokenizerConfig {
special_tokens,
chat_template,
..
} = load_tokenizer_config(files.tokenizer_config_path.as_deref())?;
let mut template = chat_template;
let special_tokens = (!special_tokens.is_empty()).then_some(special_tokens);
if let Some(configured_template) = options.chat_template.as_deref() {
template = Some(
resolve_chat_template(configured_template)
.map_err(|error| Error::ChatTemplate(error.to_report_string()))?,
);
info!("using configured chat template override");
} else if let Some(chat_template_path) = files.chat_template_path.as_deref() {
// If independent chat template file(s) exist and contain non-empty content,
// they take priority over template entries in the tokenizer config
let file_template = load_chat_template(chat_template_path)
.map_err(|error| Error::ChatTemplate(error.to_report_string()))?;
if file_template.as_ref().is_some_and(|t| !t.trim().is_empty()) {
info!(
path = %chat_template_path.display(),
"loaded dedicated chat template file, overriding tokenizer_config chat_template"
);
template = file_template;
} else {
warn!(
path = %chat_template_path.display(),
"ignoring empty dedicated chat template file and falling back to tokenizer_config chat_template"
);
}
}
Ok(Self::new(
template,
options.default_chat_template_kwargs,
options.chat_template_content_format,
)?
.with_special_tokens(special_tokens)
.with_multimodal(multimodal))
}
/// Apply the chat template to one chat request, rendering the prompt string
/// to be tokenized and submitted to the model.
///
/// If the request carries a per-request `chat_template` override, a
/// temporary template is compiled from that string and used instead of
/// the model's default.
fn apply_chat_template(&self, request: &ChatRequest) -> Result<RenderedPrompt> {
let override_template = request
.chat_options
.chat_template
.as_ref()
.map(|template| {
CompiledChatTemplate::new(template.clone(), self.content_format)
.map_err(|error| Error::ChatTemplate(error.to_report_string()))
})
.transpose()?;
let template = override_template
.as_ref()
.or(self.default_template.as_ref())
.ok_or(Error::MissingChatTemplate)?;
self.apply_chat_template_inner(template, request)
}
fn apply_chat_template_inner(
&self,
effective_template: &CompiledChatTemplate,
request: &ChatRequest,
) -> Result<RenderedPrompt> {
let messages = to_template_messages(
&request.messages,
effective_template.content_format(),
self.multimodal.as_ref(),
)?;
let tools = request.tool_parsing_enabled().then(|| to_template_tools(&request.tools));
trace!(
message_count = messages.len(),
content_format = ?effective_template.content_format(),
?messages,
?tools,
"applying chat template"
);
let mut merged_template_kwargs = self.default_template_kwargs.clone();
merged_template_kwargs.extend(request.chat_options.template_kwargs.clone());
let prompt = effective_template
.apply(TemplateContext {
messages: &messages,
add_generation_prompt: request.chat_options.add_generation_prompt(),
continue_final_message: request.chat_options.continue_final_message(),
tools: tools.as_deref(),
documents: request.documents.as_deref(),
template_kwargs: Some(&merged_template_kwargs),
special_tokens: self.special_tokens.as_ref(),
reasoning_effort: request.chat_options.reasoning_effort,
})
.map_err(|error| Error::ChatTemplate(error.to_report_string()))?;
trace!(
prompt_len = prompt.len(),
prompt, "rendered chat template prompt"
);
Ok(RenderedPrompt {
prompt: Prompt::Text(prompt),
})
}
}
impl ChatRenderer for HfChatRenderer {
fn render(&self, request: &ChatRequest) -> Result<RenderedPrompt> {
self.apply_chat_template(request)
}
}
/// Chat message in the JSON shape expected by Jinja chat templates.
// TODO: borrow more fields directly from the original `ChatMessage`.
#[serde_with::skip_serializing_none]
#[derive(Debug, Serialize)]
struct TemplateMessage {
role: &'static str,
content: TemplateContent,
// Developer-role messages may provide message-local tools in the same shape
// as top-level request tools.
tools: Option<Vec<TemplateTool>>,
// Reasoning-capable HF templates are inconsistent on the exact field name,
// so expose both variants for compatibility.
reasoning: Option<String>,
reasoning_content: Option<String>,
// Function-call-capable templates commonly expect assistant tool calls
// under this OpenAI-compatible field name.
tool_calls: Option<Vec<TemplateToolCall>>,
// Tool-role messages refer back to the assistant call they are answering.
tool_call_id: Option<String>,
}
/// Chat content in the two shapes HF templates commonly expect.
#[derive(Debug, Serialize)]
#[serde(untagged)]
enum TemplateContent {
String(String),
OpenAi(Vec<TemplateContentPart>),
}
#[derive(Debug, Serialize)]
#[serde(tag = "type", rename_all = "snake_case")]
enum TemplateContentPart {
Text { text: String },
Image,
}
#[derive(Debug, Serialize)]
struct TemplateToolCall {
id: String,
r#type: &'static str, // always "function"
function: TemplateToolFunction,
}
#[derive(Debug, Serialize)]
struct TemplateToolFunction {
name: String,
arguments: Value,
}
#[derive(Debug, Serialize)]
pub(super) struct TemplateTool {
#[serde(rename = "type")]
tool_type: &'static str,
function: TemplateToolDefinition,
}
#[derive(Debug, Serialize)]
struct TemplateToolDefinition {
name: String,
description: Option<String>,
parameters: Value,
strict: Option<bool>,
}
/// Convert chat messages into the JSON shape expected by Jinja chat templates.
fn to_template_messages(
messages: &[ChatMessage],
content_format: ChatTemplateContentFormat,
multimodal: Option<&MultimodalRenderInfo>,
) -> Result<Vec<TemplateMessage>> {
messages
.iter()
.map(|message| to_template_message(message, content_format, multimodal))
.collect()
}
fn to_template_message(
message: &ChatMessage,
content_format: ChatTemplateContentFormat,
multimodal: Option<&MultimodalRenderInfo>,
) -> Result<TemplateMessage> {
Ok(match message {
ChatMessage::System { content } => TemplateMessage {
role: "system",
content: to_template_content(content, content_format, multimodal)?,
tools: None,
reasoning: None,
reasoning_content: None,
tool_calls: None,
tool_call_id: None,
},
ChatMessage::Developer { content, tools } => TemplateMessage {
role: "developer",
content: to_template_content(content, content_format, multimodal)?,
tools: tools.as_deref().map(to_template_tools),
reasoning: None,
reasoning_content: None,
tool_calls: None,
tool_call_id: None,
},
ChatMessage::User { content } => TemplateMessage {
role: "user",
content: to_template_content(content, content_format, multimodal)?,
tools: None,
reasoning: None,
reasoning_content: None,
tool_calls: None,
tool_call_id: None,
},
ChatMessage::Assistant { content } => {
let text = content.text();
let reasoning = content.reasoning();
let tool_calls = to_template_tool_calls(content)?;
let content =
to_template_content(&ChatContent::Text(text), content_format, multimodal)?;
TemplateMessage {
role: "assistant",
content,
tools: None,
reasoning: reasoning.clone(),
reasoning_content: reasoning,
tool_calls,
tool_call_id: None,
}
}
ChatMessage::ToolResponse {
content,
tool_call_id,
} => TemplateMessage {
role: "tool",
content: to_template_content(content, content_format, multimodal)?,
tools: None,
reasoning: None,
reasoning_content: None,
tool_calls: None,
tool_call_id: Some(tool_call_id.clone()),
},
})
}
fn to_template_tool_calls(
content: &[AssistantContentBlock],
) -> Result<Option<Vec<TemplateToolCall>>> {
let mut tool_calls = Vec::new();
for tool_call in content.tool_calls() {
let arguments = serde_json::from_str::<Value>(&tool_call.arguments).map_err(|error| {
Error::ChatTemplate(format!(
"assistant tool call `{}` has invalid JSON arguments: {}",
tool_call.id,
error.as_report()
))
})?;
tool_calls.push(TemplateToolCall {
id: tool_call.id.clone(),
r#type: "function",
function: TemplateToolFunction {
name: tool_call.name.clone(),
arguments,
},
});
}
Ok((!tool_calls.is_empty()).then_some(tool_calls))
}
fn to_template_content(
content: &ChatContent,
content_format: ChatTemplateContentFormat,
multimodal: Option<&MultimodalRenderInfo>,
) -> Result<TemplateContent> {
Ok(match content_format {
ChatTemplateContentFormat::String => {
TemplateContent::String(to_template_string_content(content, multimodal)?)
}
ChatTemplateContentFormat::OpenAi => {
TemplateContent::OpenAi(to_template_openai_content(content, multimodal)?)
}
})
}
fn to_template_openai_content(
content: &ChatContent,
multimodal: Option<&MultimodalRenderInfo>,
) -> Result<Vec<TemplateContentPart>> {
match content {
ChatContent::Text(text) => Ok(vec![TemplateContentPart::Text { text: text.clone() }]),
ChatContent::Parts(parts) => parts
.iter()
.map(|part| match part {
ChatContentPart::Text { text } => {
Ok(TemplateContentPart::Text { text: text.clone() })
}
// All multimodal contents are normalized to `{ "type": <modality> }`.
ChatContentPart::ImageUrl { .. } => {
multimodal.ok_or(Error::UnsupportedMultimodalContent("image_url"))?;
Ok(TemplateContentPart::Image)
}
})
.collect(),
}
}
fn to_template_string_content(
content: &ChatContent,
multimodal: Option<&MultimodalRenderInfo>,
) -> Result<String> {
match content {
ChatContent::Text(text) => Ok(text.clone()),
ChatContent::Parts(parts) => {
let mut out = String::new();
for part in parts {
match part {
ChatContentPart::Text { text } => out.push_str(text),
ChatContentPart::ImageUrl { .. } => {
let multimodal =
multimodal.ok_or(Error::UnsupportedMultimodalContent("image_url"))?;
out.push_str(&multimodal.placeholder_token);
}
}
}
Ok(out)
}
}
}
fn to_template_tools(tools: &[ChatTool]) -> Vec<TemplateTool> {
tools
.iter()
.map(|tool| TemplateTool {
tool_type: "function",
function: TemplateToolDefinition {
name: tool.name.clone(),
description: tool.description.clone(),
parameters: tool.parameters.clone(),
strict: tool.strict,
},
})
.collect()
}
#[cfg(test)]
mod tests {
use std::collections::HashMap;
use expect_test::expect;
use serde_json::Value;
use vllm_text::Prompt;
use vllm_text::backend::hf::{HfSpecialTokens, NamedSpecialToken};
use super::{ChatTemplateContentFormatOption, HfChatRenderer, MultimodalRenderInfo};
use crate::request::{
ChatContentPart, ChatMessage, ChatRequest, ChatRole, ChatTool, ChatToolChoice,
GenerationPromptMode, ReasoningEffort,
};
use crate::{AssistantContentBlock, ChatRenderer, Error, Result};
const QWEN3_0_6B_TEMPLATE: &str = include_str!("../../../tests/templates/qwen3.jinja");
const QWEN3_5_0_8B_TEMPLATE: &str = include_str!("../../../tests/templates/qwen35.jinja");
fn sample_request(messages: Vec<ChatMessage>) -> ChatRequest {
ChatRequest {
messages,
request_id: "render-test".to_string(),
..ChatRequest::for_test()
}
}
fn render(template: Option<&str>, request: &ChatRequest) -> Result<String> {
HfChatRenderer::new(
template.map(str::to_owned),
HashMap::new(),
ChatTemplateContentFormatOption::Auto,
)?
.render(request)?
.prompt
.into_text()
.map_err(|_| unreachable!("HF renderer should return text prompt"))
}
fn render_mm(
template: &str,
request: &ChatRequest,
content_format: ChatTemplateContentFormatOption,
) -> Result<crate::RenderedPrompt> {
HfChatRenderer::new(Some(template.to_string()), HashMap::new(), content_format)?
.with_multimodal(Some(MultimodalRenderInfo {
placeholder_token: "<image>".to_string(),
}))
.render(request)
}
fn image_request() -> ChatRequest {
sample_request(vec![ChatMessage::user(vec![
ChatContentPart::text("a"),
ChatContentPart::image_url("data:image/png;base64,test"),
ChatContentPart::text("b"),
])])
}
#[test]
fn string_content_format_replaces_image_with_placeholder_text() {
let rendered = render_mm(
"{{ messages[0].content }}",
&image_request(),
ChatTemplateContentFormatOption::String,
)
.unwrap();
assert_eq!(rendered.prompt, Prompt::Text("a<image>b".to_string()));
}
#[test]
fn openai_content_format_normalizes_image_url_for_template() {
let rendered = render_mm(
"{% for item in messages[0].content %}{% if item.type == 'image' %}<|image_pad|>{% else %}{{ item.text }}{% endif %}{% endfor %}",
&image_request(),
ChatTemplateContentFormatOption::OpenAi,
)
.unwrap();
assert_eq!(rendered.prompt, Prompt::Text("a<|image_pad|>b".to_string()));
}
#[test]
fn chat_template_supports_pycompat_templates() {
let request = sample_request(vec![ChatMessage::text(ChatRole::User, "<think>hello")]);
let rendered = render(
Some(
"{% for message in messages %}{% if message.content.startswith('<think>') %}think{% else %}plain{% endif %}{% endfor %}",
),
&request,
)
.unwrap();
assert_eq!(rendered, "think");
}
#[test]
fn chat_template_passes_continue_final_message_to_template() {
let mut request = sample_request(vec![ChatMessage::text(
ChatRole::Assistant,
"The capital of",
)]);
assert_eq!(
render(
Some("{% if continue_final_message %}continue{% else %}new{% endif %}"),
&request,
)
.unwrap(),
"new"
);
request.chat_options.generation_prompt_mode = GenerationPromptMode::ContinueFinalAssistant;
assert_eq!(
render(
Some("{% if continue_final_message %}continue{% else %}new{% endif %}"),
&request,
)
.unwrap(),
"continue"
);
}
#[test]
fn chat_template_flattens_text_parts_for_string_templates() {
let request = sample_request(vec![ChatMessage::user(vec![
ChatContentPart::text("hello"),
ChatContentPart::text(" world"),
])]);
let rendered = render(Some("{{ messages[0].content }}"), &request).unwrap();
assert_eq!(rendered, "hello world");
}
#[test]
fn chat_template_exposes_developer_tools() {
let request = sample_request(vec![ChatMessage::developer(
"policy",
Some(vec![ChatTool {
name: "get_weather".to_string(),
description: Some("Get weather".to_string()),
parameters: serde_json::json!({
"type": "object",
"properties": {"city": {"type": "string"}},
"required": ["city"],
}),
strict: Some(true),
}]),
)]);
let rendered = render(
Some("{{ messages[0].role }}|{{ messages[0].content }}|{{ messages[0].tools[0].function.name }}|{{ messages[0].tools[0].function.parameters.required[0] }}"),
&request,
)
.unwrap();
assert_eq!(rendered, "developer|policy|get_weather|city");
}
#[test]
fn chat_template_keeps_string_text_for_openai_detected_templates() {
let request = sample_request(vec![ChatMessage::text(ChatRole::User, "hello")]);
let rendered = render(
Some(
"{%- for message in messages %}{%- if message.content is string %}{%- set content = message.content %}{{ content }}{%- endif %}{%- endfor %}",
),
&request,
)
.unwrap();
assert_eq!(rendered, "hello");
}
#[test]
fn chat_template_emits_openai_text_blocks_for_structured_templates() {
let request = sample_request(vec![ChatMessage::user(vec![
ChatContentPart::text("hello"),
ChatContentPart::text("world"),
])]);
let rendered = render(
Some(
"{%- for message in messages %}{%- for item in message.content %}{{ item.text }}|{%- endfor %}{%- endfor %}",
),
&request,
)
.unwrap();
assert_eq!(rendered, "hello|world|");
}
#[test]
fn chat_template_per_request_override() {
let mut request = sample_request(vec![ChatMessage::text(ChatRole::User, "hello")]);
// Default template renders one way.
let default_rendered = render(Some("{{ messages[0].content }}"), &request).unwrap();
assert_eq!(default_rendered, "hello");
// Per-request override replaces the default template entirely.
request.chat_options.chat_template = Some("override:{{ messages[0].content }}".to_string());
let overridden = render(Some("{{ messages[0].content }}"), &request).unwrap();
assert_eq!(overridden, "override:hello");
}
#[test]
fn chat_template_per_request_override_without_default_template() {
let mut request = sample_request(vec![ChatMessage::text(ChatRole::User, "hello")]);
request.chat_options.chat_template = Some("override:{{ messages[0].content }}".to_string());
let rendered = render(None, &request).unwrap();
assert_eq!(rendered, "override:hello");
}
#[test]
fn chat_template_requires_a_template() {
let request = sample_request(vec![ChatMessage::text(ChatRole::User, "hello")]);
let error = render(None, &request).unwrap_err();
assert!(matches!(error, Error::MissingChatTemplate));
}
#[test]
fn chat_template_injects_special_tokens_into_context() {
let request = sample_request(vec![ChatMessage::text(ChatRole::User, "hello")]);
let special_tokens = HfSpecialTokens {
bos_token: Some(NamedSpecialToken::Text("<bos>".to_string())),
..Default::default()
};
let rendered = HfChatRenderer::new(
Some("{{ bos_token }}|{{ bos_token is defined }}".to_string()),
HashMap::new(),
ChatTemplateContentFormatOption::Auto,
)
.unwrap()
.with_special_tokens(Some(special_tokens))
.apply_chat_template(&request)
.unwrap();
assert_eq!(rendered.prompt, Prompt::Text("<bos>|true".to_string()));
}
#[test]
fn chat_template_exposes_assistant_reasoning_separately() {
let request = sample_request(vec![ChatMessage::assistant_blocks(vec![
AssistantContentBlock::Reasoning {
text: "inner".to_string(),
},
AssistantContentBlock::Text {
text: "outer".to_string(),
},
])]);
let rendered = render(
Some("{{ messages[0].reasoning_content }}|{{ messages[0].content }}"),
&request,
)
.unwrap();
assert_eq!(rendered, "inner|outer");
}
#[test]
fn chat_template_forces_string_content_format_when_configured() {
let request = sample_request(vec![ChatMessage::user(vec![
ChatContentPart::text("hello"),
ChatContentPart::text(" world"),
])]);
let rendered = HfChatRenderer::new(
Some(
"{%- if messages[0].content is string -%}{{ messages[0].content }}{%- else -%}{%- for item in messages[0].content %}{{ item.text }}|{%- endfor -%}{%- endif -%}".to_string(),
),
HashMap::new(),
ChatTemplateContentFormatOption::String,
)
.unwrap()
.render(&request)
.unwrap()
.prompt;
assert_eq!(rendered, Prompt::Text("hello world".to_string()));
}
#[test]
fn chat_template_forces_openai_content_format_when_configured() {
let request = sample_request(vec![ChatMessage::user(vec![
ChatContentPart::text("hello"),
ChatContentPart::text(" world"),
])]);
let rendered = HfChatRenderer::new(
Some("{{ messages[0].content[0].text }}{{ messages[0].content[1].text }}".to_string()),
HashMap::new(),
ChatTemplateContentFormatOption::OpenAi,
)
.unwrap()
.render(&request)
.unwrap()
.prompt;
assert_eq!(rendered, Prompt::Text("hello world".to_string()));
}
#[test]
fn chat_template_merges_default_template_kwargs_before_request_kwargs() {
let mut request = sample_request(vec![ChatMessage::text(ChatRole::User, "hello")]);
request
.chat_options
.template_kwargs
.insert("enable_thinking".to_string(), Value::Bool(true));
let renderer = HfChatRenderer::new(
Some("{{ enable_thinking }}|{{ default_only }}".to_string()),
HashMap::from([
("enable_thinking".to_string(), Value::Bool(false)),
("default_only".to_string(), Value::String("x".to_string())),
]),
ChatTemplateContentFormatOption::Auto,
)
.unwrap();
let rendered = renderer.render(&request).unwrap().prompt;
assert_eq!(rendered, Prompt::Text("true|x".to_string()));
}
#[test]
fn chat_template_reasoning_effort_overrides_template_kwargs() {
let mut request = sample_request(vec![ChatMessage::text(ChatRole::User, "hello")]);
request.chat_options.reasoning_effort = Some(ReasoningEffort::Max);
request.chat_options.template_kwargs.insert(
"reasoning_effort".to_string(),
Value::String("low".to_string()),
);
let renderer = HfChatRenderer::new(
Some("{{ reasoning_effort }}".to_string()),
HashMap::from([(
"reasoning_effort".to_string(),
Value::String("medium".to_string()),
)]),
ChatTemplateContentFormatOption::Auto,
)
.unwrap();
let rendered = renderer.render(&request).unwrap().prompt;
assert_eq!(rendered, Prompt::Text("max".to_string()));
}
#[test]
fn qwen3_template_omits_reasoning_for_historical_assistant_messages() {
let request = sample_request(vec![
ChatMessage::text(
ChatRole::User,
"Hi. Tell me about the capital of France in short",
),
ChatMessage::assistant_blocks(vec![
AssistantContentBlock::Reasoning {
text: "\nOkay, the user is asking... I think that's all.\n".to_string(),
},
AssistantContentBlock::Text {
text: "Paris is the capital of France.".to_string(),
},
]),
ChatMessage::text(ChatRole::User, "Tell me about Paris more."),
]);
let rendered = render(Some(QWEN3_0_6B_TEMPLATE), &request).unwrap();
expect![[r#"
<|im_start|>user
Hi. Tell me about the capital of France in short<|im_end|>
<|im_start|>assistant
Paris is the capital of France.<|im_end|>
<|im_start|>user
Tell me about Paris more.<|im_end|>
<|im_start|>assistant
"#]]
.assert_eq(&rendered);
}
#[test]
fn qwen3_template_keeps_reasoning_after_the_last_user_query() {
let mut request = sample_request(vec![
ChatMessage::text(ChatRole::User, "What is 1 + 1?"),
ChatMessage::assistant_blocks(vec![
AssistantContentBlock::Reasoning {
text: "need simple arithmetic".to_string(),
},
AssistantContentBlock::Text {
text: "2".to_string(),
},
]),
]);
request.chat_options.generation_prompt_mode = GenerationPromptMode::NoGenerationPrompt;
let rendered = render(Some(QWEN3_0_6B_TEMPLATE), &request).unwrap();
expect![[r#"
<|im_start|>user
What is 1 + 1?<|im_end|>
<|im_start|>assistant
<think>
need simple arithmetic
</think>
2<|im_end|>
"#]]
.assert_eq(&rendered);
}
#[test]
fn chat_template_exposes_tools_to_templates_when_auto_enabled() {
let mut request = sample_request(vec![ChatMessage::text(ChatRole::User, "hello")]);
request.tools = vec![ChatTool {
name: "get_weather".to_string(),
description: Some("Get weather".to_string()),
parameters: serde_json::json!({
"type": "object",
"properties": {"city": {"type": "string"}},
"required": ["city"],
}),
strict: None,
}];
request.tool_choice = ChatToolChoice::Auto;
let rendered = render(
Some("{{ tools[0].function.name }}|{{ tools[0].function.parameters.required[0] }}"),
&request,
)
.unwrap();
assert_eq!(rendered, "get_weather|city");
}
#[test]
fn chat_template_exposes_assistant_tool_calls_and_tool_messages() {
let request = sample_request(vec![
ChatMessage::assistant_blocks(vec![AssistantContentBlock::ToolCall(
crate::AssistantToolCall {
id: "call_1".to_string(),
name: "get_weather".to_string(),
arguments: r#"{"city":"Paris"}"#.to_string(),
},
)]),
ChatMessage::tool_response("Sunny", "call_1"),
]);
let rendered = render(
Some(
"{{ messages[0].tool_calls[0].function.name }}|{{ messages[0].tool_calls[0].function.arguments.city }}|{{ messages[1].tool_call_id }}|{{ messages[1].content }}",
),
&request,
)
.unwrap();
assert_eq!(rendered, "get_weather|Paris|call_1|Sunny");
}
#[test]
fn qwen35_template_renders_prefilled_reasoning_start_when_thinking_enabled() {
let mut request = sample_request(vec![ChatMessage::text(ChatRole::User, "hello")]);
request
.chat_options
.template_kwargs
.insert("enable_thinking".to_string(), Value::Bool(true));
let rendered = render(Some(QWEN3_5_0_8B_TEMPLATE), &request).unwrap();
expect![[r#"
<|im_start|>user
hello<|im_end|>
<|im_start|>assistant
<think>
"#]]
.assert_eq(&rendered);
}
#[test]
fn qwen35_template_renders_closed_empty_reasoning_span_when_thinking_disabled() {
let mut request = sample_request(vec![ChatMessage::text(ChatRole::User, "hello")]);
request
.chat_options
.template_kwargs
.insert("enable_thinking".to_string(), Value::Bool(false));
let rendered = render(Some(QWEN3_5_0_8B_TEMPLATE), &request).unwrap();
expect![[r#"
<|im_start|>user
hello<|im_end|>
<|im_start|>assistant
<think>
</think>
"#]]
.assert_eq(&rendered);
}
#[test]
fn qwen35_template_omits_assistant_reasoning_prefill_without_generation_prompt() {
let mut request = sample_request(vec![ChatMessage::text(ChatRole::User, "hello")]);
request.chat_options.generation_prompt_mode = GenerationPromptMode::NoGenerationPrompt;
request
.chat_options
.template_kwargs
.insert("enable_thinking".to_string(), Value::Bool(true));
let rendered = render(Some(QWEN3_5_0_8B_TEMPLATE), &request).unwrap();
expect![[r#"
<|im_start|>user
hello<|im_end|>
"#]]
.assert_eq(&rendered);
}
}
+316
View File
@@ -0,0 +1,316 @@
//! Chat template support for tokenizers using Jinja2 templates.
//!
//! This module is inlined from SMG's tokenizer crate with local adaptations:
//! - thinking-related detection/state is removed
//! - special tokens are wired to `vllm_text::backends::hf::HfSpecialTokens`
use std::collections::HashMap;
use std::fs;
use std::path::Path;
use minijinja::Environment;
use serde::{Deserialize, Serialize};
use serde_json::{self};
use vllm_text::backend::hf::HfSpecialTokens;
use super::error::TemplateError;
use super::format::{
ChatTemplateContentFormat, ChatTemplateContentFormatOption, detect_chat_template_content_format,
};
use super::tojson::hf_tojson_filter;
use crate::renderer::hf::{TemplateMessage, TemplateTool};
use crate::request::ReasoningEffort;
type Result<T> = std::result::Result<T, TemplateError>;
/// Build a pre-configured environment with the given template string.
fn build_environment(template: String) -> Result<Environment<'static>> {
let mut env = Environment::new();
env.set_trim_blocks(true);
env.set_lstrip_blocks(true);
env.add_template_owned("chat".to_owned(), template)?;
env.set_unknown_method_callback(minijinja_contrib::pycompat::unknown_method_callback);
env.add_filter("tojson", hf_tojson_filter);
Ok(env)
}
#[serde_with::skip_serializing_none]
#[derive(Default, Serialize)]
pub(super) struct TemplateContext<'a> {
pub(super) messages: &'a [TemplateMessage],
pub(super) add_generation_prompt: bool,
pub(super) continue_final_message: bool,
pub(super) tools: Option<&'a [TemplateTool]>,
pub(super) documents: Option<&'a [serde_json::Value]>,
#[serde(flatten)]
pub(super) special_tokens: Option<&'a HfSpecialTokens>,
#[serde(flatten)]
pub(super) template_kwargs: Option<&'a HashMap<String, serde_json::Value>>,
// By putting top-level `reasoning_effort` after `template_kwargs`, this overrides any
// `reasoning_effort` value that might be present there.
pub(super) reasoning_effort: Option<ReasoningEffort>,
}
/// Load chat template from a file (`.jinja` or `.json` containing Jinja).
pub fn load_chat_template(template_path: &Path) -> Result<Option<String>> {
let content = fs::read_to_string(template_path).map_err(TemplateError::ReadTemplateFile)?;
if template_path.extension().is_some_and(|ext| ext == "json") {
#[derive(Deserialize)]
#[serde(untagged)]
enum ChatTemplateFile {
String(String),
Object { chat_template: String },
}
let json_value =
serde_json::from_str(&content).map_err(TemplateError::ParseTemplateJson)?;
let json_template =
serde_json::from_value(json_value).map_err(|_| TemplateError::InvalidTemplateJson)?;
return Ok(Some(match json_template {
ChatTemplateFile::String(template) => template,
ChatTemplateFile::Object { chat_template } => chat_template,
}));
}
let template = content.trim().replace("\\n", "\n");
Ok(Some(template))
}
/// Resolve a configured chat template value into a template string.
pub fn resolve_chat_template(chat_template: &str) -> Result<String> {
let path = Path::new(chat_template);
if path.exists() {
return load_chat_template(path).map(|template| template.unwrap_or_default());
}
const JINJA_CHARS: [char; 3] = ['{', '}', '\n'];
if chat_template.chars().any(|c| JINJA_CHARS.contains(&c)) {
return Ok(chat_template.to_string());
}
Err(TemplateError::MissingTemplatePath)
}
/// One compiled chat template with its Jinja environment and detected content
/// format.
pub(super) struct CompiledChatTemplate {
/// Cached, fully-configured environment for one compiled template.
env: Environment<'static>,
content_format: ChatTemplateContentFormat,
}
impl CompiledChatTemplate {
/// Compile the given chat template string into a [`CompiledChatTemplate`].
pub fn new(template: String, content_format: ChatTemplateContentFormatOption) -> Result<Self> {
let content_format = match content_format {
ChatTemplateContentFormatOption::Auto => detect_chat_template_content_format(&template),
ChatTemplateContentFormatOption::String => ChatTemplateContentFormat::String,
ChatTemplateContentFormatOption::OpenAi => ChatTemplateContentFormat::OpenAi,
};
let env = build_environment(template)?;
Ok(Self {
env,
content_format,
})
}
/// Apply the compiled template to the given context and return the rendered
/// prompt.
pub fn apply(&self, ctx: TemplateContext<'_>) -> Result<String> {
let tmpl = self.env.get_template("chat")?;
tmpl.render(ctx).map_err(TemplateError::from)
}
pub fn content_format(&self) -> ChatTemplateContentFormat {
self.content_format
}
}
#[cfg(test)]
mod tests {
use std::fs;
use tempfile::TempDir;
use vllm_text::backend::hf::{HfSpecialTokens, NamedSpecialToken};
use super::*;
#[test]
fn test_chat_template_state_valid_template() {
let template = CompiledChatTemplate::new(
"{{ messages }}".to_string(),
ChatTemplateContentFormatOption::Auto,
)
.unwrap();
assert_eq!(template.content_format(), ChatTemplateContentFormat::String);
let result = template.apply(TemplateContext::default()).unwrap();
assert_eq!(result, "[]");
}
#[test]
fn test_chat_template_state_invalid_template() {
let result = CompiledChatTemplate::new(
"{% invalid".to_string(),
ChatTemplateContentFormatOption::Auto,
);
assert!(result.is_err());
let err = result.err().unwrap().to_string();
assert!(
err.contains("failed to render jinja template"),
"Error should explain parse failure, got: {err}"
);
}
#[test]
fn test_special_tokens_injected_into_context() {
let template = "{{ bos_token }}hello{{ eos_token }}";
let template =
CompiledChatTemplate::new(template.to_string(), ChatTemplateContentFormatOption::Auto)
.unwrap();
let special_tokens = HfSpecialTokens {
bos_token: Some(NamedSpecialToken::Text("<s>".to_string())),
eos_token: Some(NamedSpecialToken::Text("</s>".to_string())),
..Default::default()
};
let result = template
.apply(TemplateContext {
special_tokens: Some(&special_tokens),
..Default::default()
})
.unwrap();
assert_eq!(result, "<s>hello</s>");
}
#[test]
fn test_special_tokens_undefined_when_not_provided() {
let template = "{% if bos_token is defined %}{{ bos_token }}{% endif %}hello";
let template =
CompiledChatTemplate::new(template.to_string(), ChatTemplateContentFormatOption::Auto)
.unwrap();
let result = template.apply(TemplateContext::default()).unwrap();
assert_eq!(result, "hello");
}
#[test]
fn test_special_tokens_partial() {
let template =
"{{ bos_token }}hello{% if eos_token is defined %}{{ eos_token }}{% endif %}";
let template =
CompiledChatTemplate::new(template.to_string(), ChatTemplateContentFormatOption::Auto)
.unwrap();
let special_tokens = HfSpecialTokens {
bos_token: Some(NamedSpecialToken::Text("<s>".to_string())),
eos_token: None,
..Default::default()
};
let result = template
.apply(TemplateContext {
special_tokens: Some(&special_tokens),
..Default::default()
})
.unwrap();
assert_eq!(result, "<s>hello");
}
#[test]
fn test_tojson_filter_supports_indent_and_sort_keys() {
let template = CompiledChatTemplate::new(
"{{ payload | tojson(indent=2, sort_keys=true) }}".to_string(),
ChatTemplateContentFormatOption::Auto,
)
.unwrap();
let mut kwargs = HashMap::new();
kwargs.insert("payload".to_string(), serde_json::json!({"b": 1, "a": 2}));
let result = template
.apply(TemplateContext {
template_kwargs: Some(&kwargs),
..Default::default()
})
.unwrap();
assert_eq!(result, "{\n \"a\": 2,\n \"b\": 1\n}");
}
#[test]
fn test_load_chat_template_from_file_jinja() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("chat_template.jinja");
fs::write(&path, "{{ messages }}").unwrap();
let template = load_chat_template(&path).unwrap();
assert_eq!(template.as_deref(), Some("{{ messages }}"));
}
#[test]
fn test_resolve_chat_template_from_inline_literal() {
let template = resolve_chat_template("{{ messages }}").unwrap();
assert_eq!(template, "{{ messages }}");
}
#[test]
fn test_resolve_chat_template_from_existing_file() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("chat_template.jinja");
fs::write(&path, "{{ messages }}").unwrap();
let template = resolve_chat_template(path.to_str().unwrap()).unwrap();
assert_eq!(template, "{{ messages }}");
}
#[test]
fn test_resolve_chat_template_rejects_missing_path_like_value() {
let error = resolve_chat_template("missing_template.jinja").unwrap_err();
assert!(matches!(error, TemplateError::MissingTemplatePath));
}
#[test]
fn test_chat_template_state_respects_explicit_content_format_override() {
let template = CompiledChatTemplate::new(
"{% for item in messages[0].content %}{{ item.text }}{% endfor %}".to_string(),
ChatTemplateContentFormatOption::String,
)
.unwrap();
assert_eq!(template.content_format(), ChatTemplateContentFormat::String);
}
#[test]
fn test_load_chat_template_from_file_json_string() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("chat_template.json");
fs::write(&path, "\"{{ messages }}\"").unwrap();
let template = load_chat_template(&path).unwrap();
assert_eq!(template.as_deref(), Some("{{ messages }}"));
}
#[test]
fn test_load_chat_template_from_file_json_object() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("chat_template.json");
fs::write(&path, r#"{"chat_template":"{{ messages }}"}"#).unwrap();
let template = load_chat_template(&path).unwrap();
assert_eq!(template.as_deref(), Some("{{ messages }}"));
}
}
+277
View File
@@ -0,0 +1,277 @@
use minijinja::value::{Kwargs, ViaDeserialize};
use minijinja::{Error as MinijinjaError, ErrorKind, Value};
use serde::Deserialize;
use serde_json::{self, Value as JsonValue};
use serde_json_fmt::{JsonFormat, JsonSyntaxError};
use thiserror_ext::AsReport;
/// Hugging Face-compatible `tojson` filter for chat templates.
///
/// We cannot use MiniJinja's built-in filter directly because HF relies on
/// Python `json.dumps` semantics:
/// - no HTML escaping
/// - extra kwargs such as `ensure_ascii`, `separators`, and `sort_keys`
/// - Python-style `indent` handling
pub(super) fn hf_tojson_filter(
value: Value,
kwargs: Kwargs,
) -> std::result::Result<Value, MinijinjaError> {
let ensure_ascii = kwargs.get::<Option<bool>>("ensure_ascii")?.unwrap_or(false);
let indent = parse_indent(
kwargs.get::<Option<ViaDeserialize<IndentArg>>>("indent")?.map(|value| value.0),
);
let separators = parse_separators(
kwargs
.get::<Option<ViaDeserialize<SeparatorsArg>>>("separators")?
.map(|value| value.0),
indent.is_some(),
);
let sort_keys = kwargs.get::<Option<bool>>("sort_keys")?.unwrap_or(false);
kwargs.assert_all_used()?;
let json_value: serde_json::Value = serde_json::to_value(&value).map_err(|e| {
MinijinjaError::new(
ErrorKind::InvalidOperation,
format!("Failed to convert to JSON value: {e}"),
)
})?;
let json_str = {
let value_to_serialize = if sort_keys {
&sort_json_keys(&json_value)
} else {
&json_value
};
build_json_format(indent, separators.0, separators.1, ensure_ascii)?
.format_to_string(value_to_serialize)
.map_err(|e| {
MinijinjaError::new(
ErrorKind::InvalidOperation,
format!("Failed to serialize JSON: {}", e.as_report()),
)
})?
};
Ok(Value::from_safe_string(json_str))
}
#[derive(Deserialize)]
#[serde(untagged)]
enum IndentArg {
// Python `json.dumps` accepts bool, int, and string indentation styles.
Bool(bool),
Integer(i64),
String(String),
}
fn parse_indent(value: Option<IndentArg>) -> Option<String> {
match value? {
IndentArg::Bool(indent) => Some(if indent {
" ".to_owned()
} else {
String::new()
}),
IndentArg::Integer(indent) => Some(if indent > 0 {
" ".repeat(indent as usize)
} else {
String::new()
}),
IndentArg::String(indent) => Some(indent),
}
}
#[derive(Deserialize)]
struct SeparatorsArg((String, String));
fn parse_separators(value: Option<SeparatorsArg>, pretty: bool) -> (String, String) {
let Some(SeparatorsArg((item_separator, key_separator))) = value else {
let default_item_separator = if pretty { "," } else { ", " };
let default_key_separator = ": ";
return (
default_item_separator.to_owned(),
default_key_separator.to_owned(),
);
};
(item_separator, key_separator)
}
fn build_json_format(
indent: Option<String>,
item_separator: String,
key_separator: String,
ensure_ascii: bool,
) -> std::result::Result<JsonFormat, MinijinjaError> {
JsonFormat::new()
.indent(indent)
.map_err(map_json_syntax_error("indent"))?
.comma(item_separator)
.map_err(map_json_syntax_error("separators (item)"))?
.colon(key_separator)
.map_err(map_json_syntax_error("separators (key)"))
.map(|format| format.ascii(ensure_ascii))
}
fn map_json_syntax_error(
field: &'static str,
) -> impl FnOnce(JsonSyntaxError) -> MinijinjaError + Copy {
move |error| {
MinijinjaError::new(
ErrorKind::InvalidOperation,
format!("invalid {field} value for tojson: {error}"),
)
}
}
/// Recursively sort all object keys in a JSON value.
fn sort_json_keys(value: &JsonValue) -> JsonValue {
match value {
JsonValue::Object(map) => {
let mut sorted: serde_json::Map<String, JsonValue> = serde_json::Map::new();
let mut keys: Vec<_> = map.keys().collect();
keys.sort();
for key in keys {
sorted.insert(key.clone(), sort_json_keys(&map[key]));
}
JsonValue::Object(sorted)
}
JsonValue::Array(arr) => JsonValue::Array(arr.iter().map(sort_json_keys).collect()),
_ => value.clone(),
}
}
#[cfg(test)]
mod tests {
use expect_test::expect;
use minijinja::Environment;
use serde_json::json;
use thiserror_ext::AsReport;
use super::hf_tojson_filter;
fn render(template: &str, payload: serde_json::Value) -> String {
let mut env = Environment::new();
env.add_filter("tojson", hf_tojson_filter);
env.render_str(template, json!({ "payload": payload })).unwrap()
}
fn render_error(template: &str, payload: serde_json::Value) -> minijinja::Error {
let mut env = Environment::new();
env.add_filter("tojson", hf_tojson_filter);
env.render_str(template, json!({ "payload": payload })).unwrap_err()
}
#[test]
fn tojson_does_not_html_escape_like_minijinja_builtin() {
let rendered = render("{{ payload|tojson }}", json!("<tag>&'"));
assert_eq!(rendered, "\"<tag>&'\"");
}
#[test]
fn tojson_supports_sort_keys_recursively() {
let rendered = render(
"{{ payload|tojson(sort_keys=true) }}",
json!({
"z": {"b": 1, "a": 2},
"a": 0
}),
);
assert_eq!(rendered, "{\"a\": 0, \"z\": {\"a\": 2, \"b\": 1}}");
}
#[test]
fn tojson_supports_indent() {
let rendered = render("{{ payload|tojson(indent=2) }}", json!([1, 2]));
assert_eq!(rendered, "[\n 1,\n 2\n]");
}
#[test]
fn tojson_supports_ensure_ascii_false() {
let rendered = render("{{ payload|tojson(ensure_ascii=false) }}", json!("中文"));
assert_eq!(rendered, "\"中文\"");
}
#[test]
fn tojson_supports_ensure_ascii_true() {
let rendered = render("{{ payload|tojson(ensure_ascii=true) }}", json!("中文"));
assert_eq!(rendered, "\"\\u4e2d\\u6587\"");
}
#[test]
fn tojson_supports_separators() {
let rendered = render(
"{{ payload|tojson(separators=[',', ':']) }}",
json!({
"x": [1, 2]
}),
);
assert_eq!(rendered, "{\"x\":[1,2]}");
}
#[test]
fn tojson_supports_negative_indent_as_newline_only() {
let rendered = render("{{ payload|tojson(indent=-1) }}", json!([1, 2]));
assert_eq!(rendered, "[\n1,\n2\n]");
}
#[test]
fn tojson_supports_string_indent() {
let rendered = render("{{ payload|tojson(indent=' ') }}", json!([1, 2]));
assert_eq!(rendered, "[\n 1,\n 2\n]");
}
#[test]
fn tojson_supports_boolean_indent() {
let rendered_true = render("{{ payload|tojson(indent=true) }}", json!([1, 2]));
assert_eq!(rendered_true, "[\n 1,\n 2\n]");
let rendered_false = render("{{ payload|tojson(indent=false) }}", json!([1, 2]));
assert_eq!(rendered_false, "[\n1,\n2\n]");
}
#[test]
fn tojson_combines_indent_sort_keys_separators_and_ensure_ascii() {
let rendered = render(
"{{ payload|tojson(ensure_ascii=true, sort_keys=true, separators=[',', ':'], indent=' ') }}",
json!({
"b": "<中>",
"a": [1, 2]
}),
);
assert_eq!(
rendered,
"{\n \"a\":[\n 1,\n 2\n ],\n \"b\":\"<\\u4e2d>\"\n}"
);
}
#[test]
fn tojson_rejects_invalid_indent() {
let error = render_error("{{ payload|tojson(indent='-->') }}", json!({"a": 1}));
expect!["invalid operation: invalid indent value for tojson: string contains unexpected character '-' (in <string>:1)"]
.assert_eq(&error.to_report_string());
}
#[test]
fn tojson_rejects_invalid_separator_shape() {
let error = render_error("{{ payload|tojson(separators=':,') }}", json!({"a": 1}));
expect!["cannot deserialize: invalid type: string \":,\", expected a tuple of size 2 (in <string>:1)"]
.assert_eq(&error.to_report_string());
}
#[test]
fn tojson_rejects_invalid_key_separator() {
let error = render_error(
"{{ payload|tojson(separators=[',', '=>']) }}",
json!({"a": 1}),
);
expect!["invalid operation: invalid separators (key) value for tojson: string contains unexpected character '=' (in <string>:1)"]
.assert_eq(&error.to_report_string());
}
}
+31
View File
@@ -0,0 +1,31 @@
use std::sync::Arc;
use vllm_text::Prompt;
use crate::error::Result;
use crate::request::ChatRequest;
pub mod deepseek_v32;
pub mod deepseek_v4;
pub mod hf;
mod selection;
pub use deepseek_v4::DeepSeekV4ChatRenderer;
pub use deepseek_v32::DeepSeekV32ChatRenderer;
pub use selection::RendererSelection;
/// Rendered chat prompt submitted to the text backend.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RenderedPrompt {
pub prompt: Prompt,
}
/// Minimal chat-prompt renderer used by `vllm-chat`.
pub trait ChatRenderer: Send + Sync {
/// Render one chat request into the text prompt submitted to the text
/// backend.
fn render(&self, request: &ChatRequest) -> Result<RenderedPrompt>;
}
/// Shared trait-object form of [`ChatRenderer`].
pub type DynChatRenderer = Arc<dyn ChatRenderer>;
+109
View File
@@ -0,0 +1,109 @@
use std::fmt;
use std::str::FromStr;
use serde_with::DeserializeFromStr;
/// Specify which chat renderer implementation to use.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, DeserializeFromStr)]
pub enum RendererSelection {
/// Use model-based auto-detection.
#[default]
Auto,
/// Force the generic Hugging Face chat-template renderer.
Hf,
/// Force the DeepSeek V3.2 renderer.
DeepSeekV32,
/// Force the DeepSeek V4 renderer.
DeepSeekV4,
}
impl RendererSelection {
pub const AUTO_LITERAL: &str = "auto";
pub const DEEPSEEK_V32_LITERAL: &str = "deepseek_v32";
pub const DEEPSEEK_V4_LITERAL: &str = "deepseek_v4";
pub const HF_LITERAL: &str = "hf";
/// Resolve the renderer selection using the given model type string, if
/// it's `Auto`.
pub fn resolve(self, model_type: &str) -> Self {
match self {
Self::Auto => match model_type {
Self::DEEPSEEK_V32_LITERAL => Self::DeepSeekV32,
Self::DEEPSEEK_V4_LITERAL => Self::DeepSeekV4,
_ => Self::Hf,
},
selection => selection,
}
}
}
impl FromStr for RendererSelection {
type Err = String;
fn from_str(value: &str) -> Result<Self, Self::Err> {
if value.eq_ignore_ascii_case(Self::AUTO_LITERAL) {
Ok(Self::Auto)
} else if value.eq_ignore_ascii_case(Self::HF_LITERAL) {
Ok(Self::Hf)
} else if value.eq_ignore_ascii_case(Self::DEEPSEEK_V32_LITERAL) {
Ok(Self::DeepSeekV32)
} else if value.eq_ignore_ascii_case(Self::DEEPSEEK_V4_LITERAL) {
Ok(Self::DeepSeekV4)
} else {
Err(format!(
"unknown renderer `{value}` (expected one of: auto, hf, deepseek_v32, deepseek_v4)"
))
}
}
}
impl fmt::Display for RendererSelection {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Auto => f.write_str(Self::AUTO_LITERAL),
Self::Hf => f.write_str(Self::HF_LITERAL),
Self::DeepSeekV32 => f.write_str(Self::DEEPSEEK_V32_LITERAL),
Self::DeepSeekV4 => f.write_str(Self::DEEPSEEK_V4_LITERAL),
}
}
}
#[cfg(test)]
mod tests {
use super::RendererSelection;
#[test]
fn renderer_selection_parses_known_values() {
assert_eq!(
"auto".parse::<RendererSelection>().unwrap(),
RendererSelection::Auto
);
assert_eq!(
"hf".parse::<RendererSelection>().unwrap(),
RendererSelection::Hf
);
assert_eq!(
"deepseek_v32".parse::<RendererSelection>().unwrap(),
RendererSelection::DeepSeekV32
);
assert_eq!(
"deepseek_v4".parse::<RendererSelection>().unwrap(),
RendererSelection::DeepSeekV4
);
}
#[test]
fn renderer_selection_display_round_trips() {
for selection in [
RendererSelection::Auto,
RendererSelection::Hf,
RendererSelection::DeepSeekV32,
RendererSelection::DeepSeekV4,
] {
assert_eq!(
selection.to_string().parse::<RendererSelection>().unwrap(),
selection
);
}
}
}
+662
View File
@@ -0,0 +1,662 @@
use std::collections::HashMap;
use llm_multimodal::ImageDetail;
use serde::{Deserialize, Serialize};
use serde_json::Value;
pub use vllm_text::SamplingParams;
use vllm_text::TextDecodeOptions;
pub use vllm_tool_parser::Tool as ChatTool;
use crate::AssistantMessageExt;
use crate::error::{Error, Result};
use crate::event::{AssistantContentBlock, AssistantMessage};
/// Role label for one text-only chat message.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ChatRole {
System,
Developer,
User,
Assistant,
ToolResponse,
}
/// One text-only chat content part in OpenAI-style block format.
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ChatContentPart {
/// One plain-text content block.
Text { text: String },
/// One image URL/data URL content block.
ImageUrl {
image_url: String,
detail: Option<ImageDetail>,
uuid: Option<String>,
},
// ImageData...
// ImageEmbeds...
}
impl ChatContentPart {
/// Construct one text content part with plain string content.
pub fn text(text: impl Into<String>) -> Self {
Self::Text { text: text.into() }
}
/// Construct one image URL content part with the given URL string.
pub fn image_url(image_url: impl Into<String>) -> Self {
Self::ImageUrl {
image_url: image_url.into(),
detail: None,
uuid: None,
}
}
/// Return the text content of this part when it's a text block, or an
/// "unsupported multimodal content" error otherwise.
pub(crate) fn as_text(&self) -> Result<&str> {
match self {
Self::Text { text } => Ok(text),
Self::ImageUrl { .. } => Err(Error::UnsupportedMultimodalContent("image_url")),
}
}
/// Return whether this part is a text block with empty content.
pub(crate) fn is_empty_text(&self) -> bool {
matches!(self, Self::Text { text } if text.is_empty())
}
/// Return whether this part contains any multimodal content.
pub(crate) fn is_multimodal(&self) -> bool {
match self {
Self::Text { .. } => false,
Self::ImageUrl { .. } => true,
}
}
}
/// Text-only chat content.
///
/// This supports either a simple string or an OpenAI-style list of text blocks.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(untagged)]
pub enum ChatContent {
/// Simple text content.
Text(String),
/// OpenAI-style blocks.
Parts(Vec<ChatContentPart>),
}
impl ChatContent {
/// Flatten the text content into one plain string without adding
/// separators.
// TODO: this method will be truly fallible once we add non-text content parts.
pub fn try_flatten_to_text(&self) -> Result<String> {
Ok(match self {
Self::Text(text) => text.clone(),
Self::Parts(parts) => {
parts.iter().map(ChatContentPart::as_text).collect::<Result<Vec<_>>>()?.concat()
}
})
}
/// Return whether there's no text content or only empty text blocks.
pub fn is_empty(&self) -> bool {
match self {
Self::Text(text) => text.is_empty(),
Self::Parts(parts) => parts.iter().all(ChatContentPart::is_empty_text),
}
}
/// Return whether this content contains any multimodal parts.
pub fn has_multimodal(&self) -> bool {
match self {
Self::Text(_) => false,
Self::Parts(parts) => parts.iter().any(ChatContentPart::is_multimodal),
}
}
}
impl From<String> for ChatContent {
fn from(value: String) -> Self {
Self::Text(value)
}
}
impl From<&str> for ChatContent {
fn from(value: &str) -> Self {
Self::Text(value.to_string())
}
}
impl From<Vec<ChatContentPart>> for ChatContent {
fn from(value: Vec<ChatContentPart>) -> Self {
Self::Parts(value)
}
}
/// One chat message.
///
/// Original Python API reference:
/// <https://github.com/vllm-project/vllm/blob/bc2c0c86efb28e77677a3cfb8687e976914a313a/vllm/entrypoints/chat_utils.py#L309-L333>
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(tag = "role", rename_all = "snake_case")]
pub enum ChatMessage {
/// System message content.
System { content: ChatContent },
/// Developer message content plus optional message-local tools.
Developer {
content: ChatContent,
tools: Option<Vec<ChatTool>>,
},
/// User message content.
User { content: ChatContent },
/// Assistant history content assembled from structured assistant blocks.
Assistant { content: Vec<AssistantContentBlock> },
/// Tool response content associated with one prior assistant tool call.
ToolResponse {
content: ChatContent,
tool_call_id: String,
},
}
impl ChatMessage {
/// Construct one chat message with plain string content.
pub fn text(role: ChatRole, text: impl Into<String>) -> Self {
let content: String = text.into();
match role {
ChatRole::System => Self::system(content),
ChatRole::Developer => Self::developer(content, None),
ChatRole::User => Self::user(content),
ChatRole::Assistant => Self::assistant_text(content),
ChatRole::ToolResponse => {
panic!(
"tool response messages require a tool_call_id; \
use ChatMessage::tool_response() instead"
)
}
}
}
/// Construct one chat message with system role.
pub fn system(content: impl Into<ChatContent>) -> Self {
Self::System {
content: content.into(),
}
}
/// Construct one chat message with developer role.
pub fn developer(content: impl Into<ChatContent>, tools: Option<Vec<ChatTool>>) -> Self {
Self::Developer {
content: content.into(),
tools,
}
}
/// Construct one chat message with user role.
pub fn user(content: impl Into<ChatContent>) -> Self {
Self::User {
content: content.into(),
}
}
/// Construct one chat message with assistant role and plain string content.
pub fn assistant_text(text: impl Into<String>) -> Self {
Self::Assistant {
content: vec![AssistantContentBlock::Text { text: text.into() }],
}
}
/// Construct one chat message with assistant role and structured content
/// blocks.
pub fn assistant_blocks(content: Vec<AssistantContentBlock>) -> Self {
Self::Assistant { content }
}
/// Construct one tool-role message.
pub fn tool_response(content: impl Into<ChatContent>, tool_call_id: impl Into<String>) -> Self {
Self::ToolResponse {
content: content.into(),
tool_call_id: tool_call_id.into(),
}
}
/// Return the chat role of this message.
pub fn role(&self) -> ChatRole {
match self {
Self::System { .. } => ChatRole::System,
Self::Developer { .. } => ChatRole::Developer,
Self::User { .. } => ChatRole::User,
Self::Assistant { .. } => ChatRole::Assistant,
Self::ToolResponse { .. } => ChatRole::ToolResponse,
}
}
/// Concatenate the visible text carried by this message.
pub fn text_content(&self) -> Result<String> {
match self {
Self::System { content }
| Self::Developer { content, .. }
| Self::User { content }
| Self::ToolResponse { content, .. } => content.try_flatten_to_text(),
Self::Assistant { content } => Ok(content.text()),
}
}
/// Concatenate assistant reasoning text when present.
pub fn reasoning_content(&self) -> Option<String> {
match self {
Self::Assistant { content } => content.reasoning(),
Self::System { .. }
| Self::Developer { .. }
| Self::User { .. }
| Self::ToolResponse { .. } => None,
}
}
/// Return whether this message contains any multimodal content.
pub fn has_multimodal(&self) -> bool {
match self {
Self::System { content }
| Self::Developer { content, .. }
| Self::User { content }
| Self::ToolResponse { content, .. } => content.has_multimodal(),
Self::Assistant { .. } => false,
}
}
}
impl From<AssistantMessage> for ChatMessage {
fn from(value: AssistantMessage) -> Self {
Self::Assistant {
content: value.content,
}
}
}
/// Controls how prompt rendering should end after the existing chat history.
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum GenerationPromptMode {
/// Append a generation prompt for a new assistant turn.
///
/// Equivalent to `add_generation_prompt = true` and `continue_final_message
/// = false`.
#[default]
StartNewAssistant,
/// Leave the final assistant message open so generation continues it.
///
/// Equivalent to `add_generation_prompt = false` and
/// `continue_final_message = true`.
ContinueFinalAssistant,
/// Render the existing chat history without adding any trailing generation
/// prompt.
///
/// Equivalent to `add_generation_prompt = false` and
/// `continue_final_message = false`.
NoGenerationPrompt,
}
/// Effort level for reasoning models.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum ReasoningEffort {
None,
Minimal,
Low,
Medium,
High,
XHigh,
Max,
}
impl ReasoningEffort {
pub fn as_str(self) -> &'static str {
match self {
Self::None => "none",
Self::Minimal => "minimal",
Self::Low => "low",
Self::Medium => "medium",
Self::High => "high",
Self::XHigh => "xhigh",
Self::Max => "max",
}
}
}
/// Chat-template-related request options.
///
/// These are the small subset of chat controls that currently affect prompt
/// rendering in `vllm-chat`.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ChatOptions {
/// Controls whether rendering starts a new assistant turn, continues the
/// final assistant message, or emits no trailing generation prompt at
/// all.
pub generation_prompt_mode: GenerationPromptMode,
/// Per-request Jinja chat template override. When set, this template is
/// used instead of the model's default chat template.
pub chat_template: Option<String>,
/// Effort level exposed to chat templates for reasoning models.
pub reasoning_effort: Option<ReasoningEffort>,
/// Additional keyword arguments exposed to the chat template.
pub template_kwargs: HashMap<String, Value>,
}
impl Default for ChatOptions {
fn default() -> Self {
Self {
generation_prompt_mode: GenerationPromptMode::StartNewAssistant,
chat_template: None,
reasoning_effort: None,
template_kwargs: HashMap::new(),
}
}
}
impl ChatOptions {
/// Whether to add a generation prompt for a new assistant turn after the
/// existing chat history.
pub fn add_generation_prompt(&self) -> bool {
matches!(
self.generation_prompt_mode,
GenerationPromptMode::StartNewAssistant
)
}
/// Whether to leave the final assistant message open so generation
/// continues it.
pub fn continue_final_message(&self) -> bool {
matches!(
self.generation_prompt_mode,
GenerationPromptMode::ContinueFinalAssistant
)
}
}
/// Tool-choice semantics supported by `vllm-chat`.
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ChatToolChoice {
Auto,
#[default]
None,
}
/// One chat request ready to be rendered into a prompt and lowered into a
/// generate request.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ChatRequest {
/// Stable caller-supplied request ID.
pub request_id: String,
/// Ordered chat history to render.
pub messages: Vec<ChatMessage>,
/// User-facing sampling parameters accepted by `vllm-chat`.
pub sampling_params: SamplingParams,
/// Chat-specific rendering options.
pub chat_options: ChatOptions,
/// Function tools made available to the model for this request.
pub tools: Vec<ChatTool>,
/// Tool-choice behavior for this request.
pub tool_choice: ChatToolChoice,
/// Text decode options for incremental detokenization.
pub decode_options: TextDecodeOptions,
/// Whether to emit intermediate northbound content deltas before the
/// terminal result.
///
/// If `false`, callers only observe the terminal accumulated assistant
/// output. If `true`, callers may receive zero or more incremental
/// content events before the final terminal one.
pub intermediate: bool,
/// Request scheduling priority (lower means earlier handling; default 0).
pub priority: i32,
/// Documents for RAG (retrieval-augmented generation), passed to the chat
/// template.
pub documents: Option<Vec<Value>>,
/// Salt for prefix cache isolation in multi-user environments.
pub cache_salt: Option<String>,
/// Whether to add special tokens (e.g. BOS) during prompt tokenization.
pub add_special_tokens: bool,
/// Override data parallel rank.
#[serde(default)]
pub data_parallel_rank: Option<u32>,
}
impl ChatRequest {
/// Return one minimal valid request fixture for tests.
pub fn for_test() -> Self {
Self {
request_id: "test-request".to_string(),
messages: vec![ChatMessage::text(ChatRole::User, "test")],
sampling_params: SamplingParams::default(),
chat_options: ChatOptions::default(),
tools: Vec::new(),
tool_choice: ChatToolChoice::None,
decode_options: TextDecodeOptions::default(),
intermediate: true,
priority: 0,
documents: None,
cache_salt: None,
add_special_tokens: false,
data_parallel_rank: None,
}
}
/// Validate basic request invariants before rendering.
pub fn validate(&self) -> Result<()> {
if self.messages.is_empty() {
return Err(Error::EmptyMessages);
}
match (
self.chat_options.generation_prompt_mode,
self.messages.last().map(ChatMessage::role),
) {
(GenerationPromptMode::ContinueFinalAssistant, Some(ChatRole::Assistant)) => {}
(GenerationPromptMode::ContinueFinalAssistant, _) => {
return Err(Error::ContinueFinalAssistantWithoutFinalAssistant);
}
(GenerationPromptMode::NoGenerationPrompt, _)
| (GenerationPromptMode::StartNewAssistant, _) => {}
}
Ok(())
}
/// Return true if this request contains any multimodal content in its
/// messages.
pub fn has_multimodal(&self) -> bool {
self.messages.iter().any(ChatMessage::has_multimodal)
}
/// Return true if this request should enable tool parsing based on the tool
/// choice and tool list.
pub(crate) fn tool_parsing_enabled(&self) -> bool {
matches!(self.tool_choice, ChatToolChoice::Auto) && !self.tools.is_empty()
}
/// Return the request-level thinking toggle when explicitly requested.
///
/// We currently accept the two request kwargs `thinking` and
/// `enable_thinking`. Both must be booleans when present. If both are
/// present, they must have the same value. If neither key is provided,
/// return `None`.
pub(crate) fn enable_thinking(&self) -> Result<Option<bool>> {
let thinking = self.parse_template_bool("thinking")?;
let enable_thinking = self.parse_template_bool("enable_thinking")?;
match (thinking, enable_thinking) {
(None, None) => Ok(None),
(Some(thinking), Some(enable_thinking)) if thinking != enable_thinking => {
Err(Error::ChatTemplate(
"template kwargs `thinking` and `enable_thinking` must match when both are set"
.to_string(),
))
}
(Some(thinking), _) => Ok(Some(thinking)),
(None, Some(enable_thinking)) => Ok(Some(enable_thinking)),
}
}
pub(crate) fn parse_template_bool(&self, key: &str) -> Result<Option<bool>> {
match self.chat_options.template_kwargs.get(key) {
None => Ok(None),
Some(Value::Bool(value)) => Ok(Some(*value)),
Some(other) => Err(Error::ChatTemplate(format!(
"template kwarg `{key}` must be a boolean, got {other}"
))),
}
}
}
impl ChatRole {
/// Return the chat-template role string used by the current text-only chat
/// backend.
pub fn as_str(&self) -> &'static str {
match self {
Self::System => "system",
Self::Developer => "developer",
Self::User => "user",
Self::Assistant => "assistant",
Self::ToolResponse => "tool_response",
}
}
}
#[cfg(test)]
mod tests {
use serde_json::{json, to_value};
use super::{ChatContent, ChatContentPart, ChatMessage, ChatRequest, ChatRole, ChatTool};
use crate::Error;
use crate::event::AssistantContentBlock;
#[test]
fn chat_content_deserializes_from_raw_string() {
let content: ChatContent = serde_json::from_value(json!("hello")).unwrap();
assert_eq!(content, ChatContent::Text("hello".to_string()));
}
#[test]
fn chat_content_deserializes_from_openai_text_blocks() {
let content: ChatContent =
serde_json::from_value(json!([{ "type": "text", "text": "hello" }])).unwrap();
assert_eq!(
content,
ChatContent::Parts(vec![ChatContentPart::text("hello")])
);
}
#[test]
fn chat_content_from_string_like_values_builds_text() {
assert_eq!(
ChatContent::from("hello"),
ChatContent::Text("hello".to_string())
);
assert_eq!(
ChatContent::from("hello".to_string()),
ChatContent::Text("hello".to_string())
);
}
#[test]
fn chat_content_try_flattens_text_parts_without_separators() {
let content = ChatContent::Parts(vec![
ChatContentPart::text("hello"),
ChatContentPart::text(" world"),
]);
assert_eq!(content.try_flatten_to_text().unwrap(), "hello world");
}
#[test]
fn assistant_message_collects_visible_and_reasoning_text() {
let message = ChatMessage::assistant_blocks(vec![
AssistantContentBlock::Reasoning {
text: "inner".to_string(),
},
AssistantContentBlock::Text {
text: "outer".to_string(),
},
]);
assert_eq!(message.role(), ChatRole::Assistant);
assert_eq!(message.text_content().unwrap(), "outer");
assert_eq!(message.reasoning_content().as_deref(), Some("inner"));
}
#[test]
fn developer_message_round_trips_through_serde() {
let message = ChatMessage::developer(
"hello",
Some(vec![ChatTool {
name: "get_weather".to_string(),
description: Some("Get weather".to_string()),
parameters: json!({
"type": "object",
"properties": {"city": {"type": "string"}},
}),
strict: Some(true),
}]),
);
let value = to_value(&message).unwrap();
let decoded: ChatMessage = serde_json::from_value(value).unwrap();
assert_eq!(decoded, message);
}
#[test]
fn enable_thinking_is_none_when_no_kwargs_are_present() {
let request = ChatRequest::for_test();
assert_eq!(request.enable_thinking().unwrap(), None);
}
#[test]
fn enable_thinking_accepts_matching_duplicate_kwargs() {
let mut request = ChatRequest::for_test();
request.chat_options.template_kwargs.insert("thinking".to_string(), json!(true));
request
.chat_options
.template_kwargs
.insert("enable_thinking".to_string(), json!(true));
assert_eq!(request.enable_thinking().unwrap(), Some(true));
}
#[test]
fn enable_thinking_rejects_non_boolean_kwargs() {
let mut request = ChatRequest::for_test();
request
.chat_options
.template_kwargs
.insert("thinking".to_string(), json!("yes"));
assert!(matches!(
request.enable_thinking(),
Err(Error::ChatTemplate(message))
if message.contains("`thinking` must be a boolean")
));
}
#[test]
fn enable_thinking_rejects_conflicting_duplicate_kwargs() {
let mut request = ChatRequest::for_test();
request
.chat_options
.template_kwargs
.insert("thinking".to_string(), json!(false));
request
.chat_options
.template_kwargs
.insert("enable_thinking".to_string(), json!(true));
assert!(matches!(
request.enable_thinking(),
Err(Error::ChatTemplate(message))
if message.contains("`thinking` and `enable_thinking` must match")
));
}
}
+237
View File
@@ -0,0 +1,237 @@
use std::pin::Pin;
use std::sync::Arc;
use std::task::{Context, Poll};
use futures::Stream;
use trait_set::trait_set;
use vllm_text::{DecodedLogprobs, DecodedPositionLogprobs, DecodedPromptLogprobs};
use crate::FinishReason;
use crate::error::{Error, Result};
use crate::event::{AssistantContentBlock, AssistantMessage, ChatEvent};
/// Final structured assistant message plus terminal stream metadata.
#[derive(Debug, Clone, PartialEq)]
pub struct CollectedAssistantMessage {
pub message: AssistantMessage,
pub prompt_token_count: usize,
pub prompt_token_ids: Arc<[u32]>,
pub prompt_logprobs: Option<DecodedPromptLogprobs>,
pub logprobs: Option<DecodedLogprobs>,
pub token_ids: Vec<u32>,
pub output_token_count: usize,
pub finish_reason: FinishReason,
/// Connector-specific KV transfer parameters for disaggregated serving.
pub kv_transfer_params: Option<serde_json::Value>,
}
/// Per-request stream of chat events.
pub struct ChatEventStream {
request_id: String,
inner: Pin<Box<dyn Stream<Item = Result<ChatEvent>> + Send>>,
}
impl ChatEventStream {
pub(crate) fn new(request_id: String, inner: impl crate::output::ChatEventStream) -> Self {
Self {
request_id,
inner: Box::pin(inner),
}
}
/// Return the request ID associated with this stream.
pub fn request_id(&self) -> &str {
&self.request_id
}
/// Collect the stream to completion and return the final assembled
/// assistant message.
pub async fn collect_message(mut self) -> Result<CollectedAssistantMessage> {
use futures::StreamExt as _;
let mut message = AssistantMessage::default();
let mut prompt_logprobs = None;
let mut prompt_token_ids: Arc<[u32]> = Arc::from([]);
let mut logprob_positions: Vec<DecodedPositionLogprobs> = Vec::new();
let mut token_ids: Vec<u32> = Vec::new();
while let Some(event) = self.next().await.transpose()? {
match event {
ChatEvent::Start {
prompt_logprobs: start_prompt_logprobs,
prompt_token_ids: start_prompt_token_ids,
} => {
prompt_logprobs = start_prompt_logprobs;
prompt_token_ids = start_prompt_token_ids;
}
ChatEvent::BlockEnd { block, .. } => message.push_block(block),
ChatEvent::LogprobsDelta {
logprobs,
token_ids: delta_ids,
} => {
if let Some(logprobs) = logprobs {
logprob_positions.extend(logprobs.positions);
}
token_ids.extend(delta_ids);
}
ChatEvent::Done {
message: done,
prompt_token_count,
output_token_count,
finish_reason,
kv_transfer_params,
} => {
return Ok(CollectedAssistantMessage {
message: done,
prompt_token_count,
prompt_token_ids,
prompt_logprobs,
logprobs: (!logprob_positions.is_empty()).then_some(DecodedLogprobs {
positions: logprob_positions,
}),
token_ids,
output_token_count,
finish_reason,
kv_transfer_params,
});
}
ChatEvent::ToolCallEnd { call, .. } => {
message.push_block(AssistantContentBlock::ToolCall(call));
}
ChatEvent::BlockStart { .. }
| ChatEvent::BlockDelta { .. }
| ChatEvent::ToolCallStart { .. }
| ChatEvent::ToolCallArgumentsDelta { .. } => {}
}
}
// Note: this is actually unreachable, as the underlying stream always emit an
// error on unexpected close.
Err(Error::StreamClosedBeforeTerminalOutput {
request_id: self.request_id,
})
}
}
impl Stream for ChatEventStream {
type Item = Result<ChatEvent>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
Pin::new(&mut self.inner).poll_next(cx)
}
}
trait_set! {
pub trait ChatEventStreamTrait = Stream<Item = Result<ChatEvent>> + Send + 'static;
}
#[cfg(test)]
mod tests {
use futures::stream;
use vllm_llm::FinishReason;
use vllm_text::{
DecodedLogprobs, DecodedPositionLogprobs, DecodedPromptLogprobs, DecodedTokenLogprob,
};
use super::{ChatEventStream, CollectedAssistantMessage};
use crate::error::Error;
use crate::event::ChatEvent;
#[tokio::test]
async fn collect_message_requires_terminal_done_event() {
let stream = ChatEventStream::new(
"chat-missing-done".to_string(),
stream::iter([Ok(ChatEvent::Start {
prompt_token_ids: vec![].into(),
prompt_logprobs: None,
})]),
);
let error = stream.collect_message().await.expect_err("missing done");
assert!(matches!(
error,
Error::StreamClosedBeforeTerminalOutput { request_id }
if request_id == "chat-missing-done"
));
}
#[tokio::test]
async fn collect_message_retains_prompt_and_sample_logprobs() {
let stream = ChatEventStream::new(
"chat-logprobs".to_string(),
stream::iter(vec![
Ok(ChatEvent::Start {
prompt_token_ids: vec![10, 11].into(),
prompt_logprobs: Some(DecodedPromptLogprobs {
first_token_id: 0,
first_token: "o".to_string(),
scored_positions: vec![DecodedPositionLogprobs {
entries: vec![DecodedTokenLogprob {
token_id: 0,
token: "p".to_string(),
logprob: -0.1,
rank: 1,
}],
}],
}),
}),
Ok(ChatEvent::LogprobsDelta {
logprobs: Some(DecodedLogprobs {
positions: vec![DecodedPositionLogprobs {
entries: vec![DecodedTokenLogprob {
token_id: 0,
token: "a".to_string(),
logprob: -0.2,
rank: 1,
}],
}],
}),
token_ids: vec![],
}),
Ok(ChatEvent::Done {
message: Default::default(),
prompt_token_count: 2,
output_token_count: 1,
finish_reason: FinishReason::stop_eos(),
kv_transfer_params: None,
}),
]),
);
let collected = stream.collect_message().await.unwrap();
assert_eq!(
collected,
CollectedAssistantMessage {
message: Default::default(),
prompt_token_count: 2,
prompt_token_ids: vec![10, 11].into(),
prompt_logprobs: Some(DecodedPromptLogprobs {
first_token_id: 0,
first_token: "o".to_string(),
scored_positions: vec![DecodedPositionLogprobs {
entries: vec![DecodedTokenLogprob {
token_id: 0,
token: "p".to_string(),
logprob: -0.1,
rank: 1,
}],
}],
}),
logprobs: Some(DecodedLogprobs {
positions: vec![DecodedPositionLogprobs {
entries: vec![DecodedTokenLogprob {
token_id: 0,
token: "a".to_string(),
logprob: -0.2,
rank: 1,
}],
}],
}),
token_ids: vec![],
output_token_count: 1,
finish_reason: FinishReason::stop_eos(),
kv_transfer_params: None,
}
);
}
}
File diff suppressed because it is too large Load Diff
+89
View File
@@ -0,0 +1,89 @@
{%- if tools %}
{{- '<|im_start|>system\n' }}
{%- if messages[0].role == 'system' %}
{{- messages[0].content + '\n\n' }}
{%- endif %}
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
{%- for tool in tools %}
{{- "\n" }}
{{- tool | tojson }}
{%- endfor %}
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
{%- else %}
{%- if messages[0].role == 'system' %}
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
{%- endif %}
{%- endif %}
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
{%- for message in messages[::-1] %}
{%- set index = (messages|length - 1) - loop.index0 %}
{%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
{%- set ns.multi_step_tool = false %}
{%- set ns.last_query_index = index %}
{%- endif %}
{%- endfor %}
{%- for message in messages %}
{%- if message.content is string %}
{%- set content = message.content %}
{%- else %}
{%- set content = '' %}
{%- endif %}
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
{%- elif message.role == "assistant" %}
{%- set reasoning_content = '' %}
{%- if message.reasoning_content is string %}
{%- set reasoning_content = message.reasoning_content %}
{%- else %}
{%- if '</think>' in content %}
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
{%- endif %}
{%- endif %}
{%- if loop.index0 > ns.last_query_index %}
{%- if loop.last or (not loop.last and reasoning_content) %}
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
{%- else %}
{{- '<|im_start|>' + message.role + '\n' + content }}
{%- endif %}
{%- else %}
{{- '<|im_start|>' + message.role + '\n' + content }}
{%- endif %}
{%- if message.tool_calls %}
{%- for tool_call in message.tool_calls %}
{%- if (loop.first and content) or (not loop.first) %}
{{- '\n' }}
{%- endif %}
{%- if tool_call.function %}
{%- set tool_call = tool_call.function %}
{%- endif %}
{{- '<tool_call>\n{"name": "' }}
{{- tool_call.name }}
{{- '", "arguments": ' }}
{%- if tool_call.arguments is string %}
{{- tool_call.arguments }}
{%- else %}
{{- tool_call.arguments | tojson }}
{%- endif %}
{{- '}\n</tool_call>' }}
{%- endfor %}
{%- endif %}
{{- '<|im_end|>\n' }}
{%- elif message.role == "tool" %}
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
{{- '<|im_start|>user' }}
{%- endif %}
{{- '\n<tool_response>\n' }}
{{- content }}
{{- '\n</tool_response>' }}
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
{{- '<|im_end|>\n' }}
{%- endif %}
{%- endif %}
{%- endfor %}
{%- if add_generation_prompt %}
{{- '<|im_start|>assistant\n' }}
{%- if enable_thinking is defined and enable_thinking is false %}
{{- '<think>\n\n</think>\n\n' }}
{%- endif %}
{%- endif %}
+154
View File
@@ -0,0 +1,154 @@
{%- set image_count = namespace(value=0) %}
{%- set video_count = namespace(value=0) %}
{%- macro render_content(content, do_vision_count, is_system_content=false) %}
{%- if content is string %}
{{- content }}
{%- elif content is iterable and content is not mapping %}
{%- for item in content %}
{%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
{%- if is_system_content %}
{{- raise_exception('System message cannot contain images.') }}
{%- endif %}
{%- if do_vision_count %}
{%- set image_count.value = image_count.value + 1 %}
{%- endif %}
{%- if add_vision_id %}
{{- 'Picture ' ~ image_count.value ~ ': ' }}
{%- endif %}
{{- '<|vision_start|><|image_pad|><|vision_end|>' }}
{%- elif 'video' in item or item.type == 'video' %}
{%- if is_system_content %}
{{- raise_exception('System message cannot contain videos.') }}
{%- endif %}
{%- if do_vision_count %}
{%- set video_count.value = video_count.value + 1 %}
{%- endif %}
{%- if add_vision_id %}
{{- 'Video ' ~ video_count.value ~ ': ' }}
{%- endif %}
{{- '<|vision_start|><|video_pad|><|vision_end|>' }}
{%- elif 'text' in item %}
{{- item.text }}
{%- else %}
{{- raise_exception('Unexpected item type in content.') }}
{%- endif %}
{%- endfor %}
{%- elif content is none or content is undefined %}
{{- '' }}
{%- else %}
{{- raise_exception('Unexpected content type.') }}
{%- endif %}
{%- endmacro %}
{%- if not messages %}
{{- raise_exception('No messages provided.') }}
{%- endif %}
{%- if tools and tools is iterable and tools is not mapping %}
{{- '<|im_start|>system\n' }}
{{- "# Tools\n\nYou have access to the following functions:\n\n<tools>" }}
{%- for tool in tools %}
{{- "\n" }}
{{- tool | tojson }}
{%- endfor %}
{{- "\n</tools>" }}
{{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
{%- if messages[0].role == 'system' %}
{%- set content = render_content(messages[0].content, false, true)|trim %}
{%- if content %}
{{- '\n\n' + content }}
{%- endif %}
{%- endif %}
{{- '<|im_end|>\n' }}
{%- else %}
{%- if messages[0].role == 'system' %}
{%- set content = render_content(messages[0].content, false, true)|trim %}
{{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
{%- endif %}
{%- endif %}
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
{%- for message in messages[::-1] %}
{%- set index = (messages|length - 1) - loop.index0 %}
{%- if ns.multi_step_tool and message.role == "user" %}
{%- set content = render_content(message.content, false)|trim %}
{%- if not(content.startswith('<tool_response>') and content.endswith('</tool_response>')) %}
{%- set ns.multi_step_tool = false %}
{%- set ns.last_query_index = index %}
{%- endif %}
{%- endif %}
{%- endfor %}
{%- if ns.multi_step_tool %}
{{- raise_exception('No user query found in messages.') }}
{%- endif %}
{%- for message in messages %}
{%- set content = render_content(message.content, true)|trim %}
{%- if message.role == "system" %}
{%- if not loop.first %}
{{- raise_exception('System message must be at the beginning.') }}
{%- endif %}
{%- elif message.role == "user" %}
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
{%- elif message.role == "assistant" %}
{%- set reasoning_content = '' %}
{%- if message.reasoning_content is string %}
{%- set reasoning_content = message.reasoning_content %}
{%- else %}
{%- if '</think>' in content %}
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
{%- endif %}
{%- endif %}
{%- set reasoning_content = reasoning_content|trim %}
{%- if loop.index0 > ns.last_query_index %}
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content + '\n</think>\n\n' + content }}
{%- else %}
{{- '<|im_start|>' + message.role + '\n' + content }}
{%- endif %}
{%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
{%- for tool_call in message.tool_calls %}
{%- if tool_call.function is defined %}
{%- set tool_call = tool_call.function %}
{%- endif %}
{%- if loop.first %}
{%- if content|trim %}
{{- '\n\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
{%- else %}
{{- '<tool_call>\n<function=' + tool_call.name + '>\n' }}
{%- endif %}
{%- else %}
{{- '\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
{%- endif %}
{%- if tool_call.arguments is defined %}
{%- for args_name, args_value in tool_call.arguments|items %}
{{- '<parameter=' + args_name + '>\n' }}
{%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
{{- args_value }}
{{- '\n</parameter>\n' }}
{%- endfor %}
{%- endif %}
{{- '</function>\n</tool_call>' }}
{%- endfor %}
{%- endif %}
{{- '<|im_end|>\n' }}
{%- elif message.role == "tool" %}
{%- if loop.previtem and loop.previtem.role != "tool" %}
{{- '<|im_start|>user' }}
{%- endif %}
{{- '\n<tool_response>\n' }}
{{- content }}
{{- '\n</tool_response>' }}
{%- if not loop.last and loop.nextitem.role != "tool" %}
{{- '<|im_end|>\n' }}
{%- elif loop.last %}
{{- '<|im_end|>\n' }}
{%- endif %}
{%- else %}
{{- raise_exception('Unexpected message role.') }}
{%- endif %}
{%- endfor %}
{%- if add_generation_prompt %}
{{- '<|im_start|>assistant\n' }}
{%- if enable_thinking is defined and enable_thinking is true %}
{{- '<think>\n' }}
{%- else %}
{{- '<think>\n\n</think>\n\n' }}
{%- endif %}
{%- endif %}
+6
View File
@@ -0,0 +1,6 @@
# vLLM Example Templates
These fixtures are copied from `vllm/examples/`.
They are currently used by `src/chat/src/renderers/hf/format.rs` tests to keep
our chat-template content format detection aligned with Python vLLM behavior.
@@ -0,0 +1,29 @@
{{ (messages|selectattr('role', 'equalto', 'system')|list|last).content|trim if (messages|selectattr('role', 'equalto', 'system')|list) else '' }}
{% for message in messages %}
{% if message['role'] == 'user' %}
### Instruction:
{{ message['content']|trim -}}
{% if not loop.last %}
{% endif %}
{% elif message['role'] == 'assistant' %}
### Response:
{{ message['content']|trim -}}
{% if not loop.last %}
{% endif %}
{% elif message['role'] == 'user_context' %}
### Input:
{{ message['content']|trim -}}
{% if not loop.last %}
{% endif %}
{% endif %}
{% endfor %}
{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}
### Response:
{% endif %}
@@ -0,0 +1,13 @@
{{ (messages|selectattr('role', 'equalto', 'system')|list|last).content|trim if (messages|selectattr('role', 'equalto', 'system')|list) else '' }}
{%- for message in messages -%}
{%- if message['role'] == 'user' -%}
{{- '<reserved_106>' + message['content'] -}}
{%- elif message['role'] == 'assistant' -%}
{{- '<reserved_107>' + message['content'] -}}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt and messages[-1]['role'] != 'assistant' -%}
{{- '<reserved_107>' -}}
{% endif %}
@@ -0,0 +1,18 @@
{%- set counter = namespace(index=0) -%}
{%- for message in messages -%}
{%- if message['role'] == 'user' -%}
{{- '[Round ' + counter.index|string + ']\n问:' + message['content'] -}}
{%- set counter.index = counter.index + 1 -%}
{%- endif -%}
{%- if message['role'] == 'assistant' -%}
{{- '\n答:' + message['content'] -}}
{%- if (loop.last and add_generation_prompt) or not loop.last -%}
{{- '\n' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt and messages[-1]['role'] != 'assistant' -%}
{{- '\n答:' -}}
{%- endif -%}
@@ -0,0 +1,18 @@
{%- set counter = namespace(index=1) -%}
{%- for message in messages -%}
{%- if message['role'] == 'user' -%}
{{- '[Round ' + counter.index|string + ']\n\n问:' + message['content'] -}}
{%- set counter.index = counter.index + 1 -%}
{%- endif -%}
{%- if message['role'] == 'assistant' -%}
{{- '\n\n答:' + message['content'] -}}
{%- if (loop.last and add_generation_prompt) or not loop.last -%}
{{- '\n\n' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt and messages[-1]['role'] != 'assistant' -%}
{{- '\n\n答:' -}}
{%- endif -%}
@@ -0,0 +1,2 @@
{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\n'}}{% endif %}{% endfor %}
{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\n' }}{% endif %}
@@ -0,0 +1,15 @@
{%- for message in messages -%}
{%- if message['role'] == 'user' -%}
{{- 'User: ' + message['content'] -}}
{%- elif message['role'] == 'assistant' -%}
{{- 'Assistant: ' + message['content'] -}}
{%- endif -%}
{%- if (loop.last and add_generation_prompt) or not loop.last -%}
{{- '\n' -}}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt and messages[-1]['role'] != 'assistant' -%}
{{- 'Assistant:' -}}
{% endif %}
@@ -0,0 +1,17 @@
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{{- 'System: ' + message['content'] -}}
{%- elif message['role'] == 'user' -%}
{{- 'User: ' + message['content'] -}}
{%- elif message['role'] == 'assistant' -%}
{{- 'Falcon: ' + message['content'] -}}
{%- endif -%}
{%- if (loop.last and add_generation_prompt) or not loop.last -%}
{{- '\n' -}}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt and messages[-1]['role'] != 'assistant' -%}
{{- 'Falcon:' -}}
{% endif %}
@@ -0,0 +1,30 @@
<#meta#>
- Date: {{ (messages|selectattr('role', 'equalto', 'meta-current_date')|list|last).content|trim if (messages|selectattr('role', 'equalto', 'meta-current_date')|list) else '' }}
- Task: {{ (messages|selectattr('role', 'equalto', 'meta-task_name')|list|last).content|trim if (messages|selectattr('role', 'equalto', 'meta-task_name')|list) else '' }}
<#system#>
{{ (messages|selectattr('role', 'equalto', 'system')|list|last).content|trim if (messages|selectattr('role', 'equalto', 'system')|list) else '' }}
<#chat#>
{% for message in messages %}
{% if message['role'] == 'user' %}
<#user#>
{{ message['content']|trim -}}
{% if not loop.last %}
{% endif %}
{% elif message['role'] == 'assistant' %}
<#bot#>
{{ message['content']|trim -}}
{% if not loop.last %}
{% endif %}
{% elif message['role'] == 'user_context' %}
<#user_context#>
{{ message['content']|trim -}}
{% if not loop.last %}
{% endif %}
{% endif %}
{% endfor %}
{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}
<#bot#>
{% endif %}
@@ -0,0 +1,12 @@
{%- for message in messages %}
{%- if message['role'] == 'user' %}
{{- '<_user>' + message['content']|trim }}
{%- elif message['role'] == 'system' %}
{{- '<_system>' + message['content']|trim }}
{%- elif message['role'] == 'assistant' %}
{{- '<_bot>' + message['content'] }}
{%- endif %}
{%- endfor %}
{%- if add_generation_prompt %}
{{- '<_bot>' }}
{%- endif %}
@@ -0,0 +1,92 @@
{% if not add_generation_prompt is defined %}
{% set add_generation_prompt = false %}
{% endif %}
{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true, is_last_user=false) %}
{%- for message in messages %}
{%- if message['role'] == 'system' %}
{%- if ns.is_first_sp %}
{% set ns.system_prompt = ns.system_prompt + message['content'] %}
{% set ns.is_first_sp = false %}
{%- else %}
{% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}
{%- endif %}
{%- endif %}
{%- endfor -%}
{#- Adapted from https://github.com/sgl-project/sglang/blob/main/examples/chat_template/tool_chat_template_deepseekr1.jinja #}
{% if tools is defined and tools is not none %}
{% set tool_ns = namespace(text='You are a helpful assistant with tool calling capabilities. '
'When a tool call is needed, you MUST use the following format to issue the call:\n'
'<tool▁calls▁begin><tool▁call▁begin>function<tool▁sep>FUNCTION_NAME\n'
'```json\n{"param1": "value1", "param2": "value2"}\n```<tool▁call▁end><tool▁calls▁end>\n\n'
'Make sure the JSON is valid.'
'## Tools\n\n### Function\n\nYou have the following functions available:\n\n') %}
{% for tool in tools %}
{% set tool_ns.text = tool_ns.text + '\n```json\n' + (tool | tojson) + '\n```\n' %}
{% endfor %}
{% set ns.system_prompt = ns.system_prompt + '\n\n' + tool_ns.text %}
{% endif %}
{{- bos_token }}
{{- ns.system_prompt }}
{%- for message in messages %}
{% set content = message['content'] %}
{%- if message['role'] == 'user' %}
{%- set ns.is_tool = false -%}
{%- set ns.is_first = false -%}
{%- set ns.is_last_user = true -%}
{{'<User>' + content + '<Assistant>'}}
{%- endif %}
{%- if message['role'] == 'assistant' %}
{% if '</think>' in content %}
{% set content = content.split('</think>')[-1] %}
{% endif %}
{% endif %}
{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}
{%- set ns.is_last_user = false -%}
{%- if ns.is_tool %}
{{- '<tool▁outputs▁end>'}}
{%- endif %}
{%- set ns.is_first = false %}
{%- set ns.is_tool = false -%}
{%- set ns.is_output_first = true %}
{%- for tool in message['tool_calls'] %}
{%- if not ns.is_first %}
{%- if content is none %}
{{- '<tool▁calls▁begin><tool▁call▁begin>' + tool['type'] + '<tool▁sep>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<tool▁call▁end>'}}
{%- else %}
{{- content + '<tool▁calls▁begin><tool▁call▁begin>' + tool['type'] + '<tool▁sep>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<tool▁call▁end>'}}
{%- endif %}
{%- set ns.is_first = true -%}
{%- else %}
{{- '\n' + '<tool▁call▁begin>' + tool['type'] + '<tool▁sep>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<tool▁call▁end>'}}
{%- endif %}
{%- endfor %}
{{- '<tool▁calls▁end><end▁of▁sentence>'}}
{%- endif %}
{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none)%}
{%- set ns.is_last_user = false -%}
{%- if ns.is_tool %}
{{- '<tool▁outputs▁end>' + content + '<end▁of▁sentence>'}}
{%- set ns.is_tool = false -%}
{%- else %}
{{- content + '<end▁of▁sentence>'}}
{%- endif %}
{%- endif %}
{%- if message['role'] == 'tool' %}
{%- set ns.is_last_user = false -%}
{%- set ns.is_tool = true -%}
{%- if ns.is_output_first %}
{{- '<tool▁outputs▁begin><tool▁output▁begin>' + content + '<tool▁output▁end>'}}
{%- set ns.is_output_first = false %}
{%- else %}
{{- '\n<tool▁output▁begin>' + content + '<tool▁output▁end>'}}
{%- endif %}
{%- endif %}
{%- endfor -%}
{% if ns.is_tool %}
{{- '<tool▁outputs▁end>'}}
{%- endif %}
{% if add_generation_prompt and not ns.is_last_user and not ns.is_tool %}
{{- '<Assistant>'}}
{%- endif %}
@@ -0,0 +1,96 @@
{% if not add_generation_prompt is defined %}
{% set add_generation_prompt = false %}
{% endif %}
{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true, is_last_user=false) %}
{%- for message in messages %}
{%- if message['role'] == 'system' %}
{%- if ns.is_first_sp %}
{% set ns.system_prompt = ns.system_prompt + message['content'] %}
{% set ns.is_first_sp = false %}
{%- else %}
{% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}
{%- endif %}
{%- endif %}
{%- endfor %}
{{ bos_token }}
{{ ns.system_prompt }}
{%- if tools %}
{{"\n\n# Tools\n\nYou may call one or more functions to assist with the user query." }}
{%- for tool in tools %}
{{- "\n" }}
{{- tool | tojson }}
{%- endfor %}
{{"\n</tools>\n\n"}}
{{"For function call returns, you should first print <tool▁calls▁begin>"}}
{{"For each function call, you should return object like:\n" }}
{{"<tool▁call▁begin>function<tool▁sep><function_name>\n```json\n<function_arguments_in_json_format>\n```<tool▁call▁end>"}}
{{"At the end of function call returns, you should print <tool▁calls▁end><end▁of▁sentence>"}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'user' %}
{%- set ns.is_tool = false -%}
{%- set ns.is_first = false -%}
{%- set ns.is_last_user = true -%}
{{'<User>' + message['content'] + '<Assistant>'}}
{%- endif %}
{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}
{%- set ns.is_last_user = false -%}
{%- if ns.is_tool %}
{{'<tool▁outputs▁end>'}}
{%- endif %}
{%- set ns.is_first = false %}
{%- set ns.is_tool = false -%}
{%- set ns.is_output_first = true %}
{%- for tool in message['tool_calls'] %}
{%- if not ns.is_first %}
{%- if message['content'] is none %}
{{'<tool▁calls▁begin><tool▁call▁begin>' + tool['type'] + '<tool▁sep>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<tool▁call▁end>'}}
{%- else %}
{{message['content'] + '<tool▁calls▁begin><tool▁call▁begin>' + tool['type'] + '<tool▁sep>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<tool▁call▁end>'}}
{%- endif %}
{%- set ns.is_first = true -%}
{%- else %}
{{'\n' + '<tool▁call▁begin>' + tool['type'] + '<tool▁sep>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<tool▁call▁end>'}}
{%- endif %}
{%- endfor %}
{{'<tool▁calls▁end><end▁of▁sentence>'}}
{%- endif %}
{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none)%}
{%- set ns.is_last_user = false -%}
{%- if ns.is_tool %}
{{'<tool▁outputs▁end>' + message['content'] + '<end▁of▁sentence>'}}
{%- set ns.is_tool = false -%}
{%- else %}
{% set content = message['content'] %}
{{content + '<end▁of▁sentence>'}}
{%- endif %}
{%- endif %}
{%- if message['role'] == 'tool' %}
{%- set ns.is_last_user = false -%}
{%- set ns.is_tool = true -%}
{%- if ns.is_output_first %}
{{'<tool▁outputs▁begin><tool▁output▁begin>' + message['content'] + '<tool▁output▁end>'}}
{%- set ns.is_output_first = false %}
{%- else %}
{{'\n<tool▁output▁begin>' + message['content'] + '<tool▁output▁end>'}}
{%- endif %}
{%- endif %}
{%- endfor -%}
{% if ns.is_tool %}
{{'<tool▁outputs▁end>'}}
{% endif %}
{% if add_generation_prompt and not ns.is_last_user and not ns.is_tool %}
{{'<Assistant>'}}
{% endif %}
@@ -0,0 +1,91 @@
{% if not add_generation_prompt is defined %}
{% set add_generation_prompt = false %}
{% endif %}
{% if not thinking is defined %}
{% set thinking = false %}
{% endif %}
{% set ns = namespace(is_first=false, is_tool=false, system_prompt='', is_first_sp=true, is_last_user=false) %}
{%- for message in messages %}
{%- if message['role'] == 'system' %}
{%- if ns.is_first_sp %}
{% set ns.system_prompt = ns.system_prompt + message['content'] %}
{% set ns.is_first_sp = false %}
{%- else %}
{% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}
{%- endif %}
{%- endif %}
{%- endfor %}
{% if tools is defined and tools is not none %}
{% set tool_ns = namespace(text='## Tools\nYou have access to the following tools:\n') %}
{% for tool in tools %}
{% set tool_ns.text = tool_ns.text + '\n### ' + tool.function.name + '\nDescription: ' + tool.function.description + '\n\nParameters: ' + (tool.function.parameters | tojson) + '\n' %}
{% endfor %}
{% set tool_ns.text = tool_ns.text + "\nIMPORTANT: ALWAYS adhere to this exact format for tool use:\n<tool▁calls▁begin><tool▁call▁begin>tool_call_name<tool▁sep>tool_call_arguments<tool▁call▁end>{{additional_tool_calls}}<tool▁calls▁end>\n\nWhere:\n\n- `tool_call_name` must be an exact match to one of the available tools\n- `tool_call_arguments` must be valid JSON that strictly follows the tool's Parameters Schema\n- For multiple tool calls, chain them directly without separators or spaces\n" %}
{% set ns.system_prompt = ns.system_prompt + '\n\n' + tool_ns.text %}
{% endif %}
{{ bos_token }}{{ ns.system_prompt }}
{%- for message in messages %}
{%- if message['role'] == 'user' %}
{%- set ns.is_tool = false -%}
{%- set ns.is_first = false -%}
{%- set ns.is_last_user = true -%}
{{'<User>' + message['content']}}
{%- endif %}
{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}
{%- if ns.is_last_user %}
{{'<Assistant></think>'}}
{%- endif %}
{%- set ns.is_last_user = false -%}
{%- set ns.is_first = false %}
{%- set ns.is_tool = false -%}
{%- for tool in message['tool_calls'] %}
{%- if not ns.is_first %}
{%- if message['content'] is none %}
{{'<tool▁calls▁begin><tool▁call▁begin>'+ tool['function']['name'] + '<tool▁sep>' + tool['function']['arguments']|tojson + '<tool▁call▁end>'}}
{%- else %}
{{message['content'] + '<tool▁calls▁begin><tool▁call▁begin>' + tool['function']['name'] + '<tool▁sep>' + tool['function']['arguments']|tojson + '<tool▁call▁end>'}}
{%- endif %}
{%- set ns.is_first = true -%}
{%- else %}
{{'<tool▁call▁begin>'+ tool['function']['name'] + '<tool▁sep>' + tool['function']['arguments']|tojson + '<tool▁call▁end>'}}
{%- endif %}
{%- endfor %}
{{'<tool▁calls▁end><end▁of▁sentence>'}}
{%- endif %}
{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) %}
{%- if ns.is_last_user %}
{{'<Assistant>'}}
{%- if message['prefix'] is defined and message['prefix'] and thinking %}
{{'<think>'}}
{%- else %}
{{'</think>'}}
{%- endif %}
{%- endif %}
{%- set ns.is_last_user = false -%}
{%- if ns.is_tool %}
{{message['content'] + '<end▁of▁sentence>'}}
{%- set ns.is_tool = false -%}
{%- else %}
{%- set content = message['content'] -%}
{%- if '</think>' in content %}
{%- set content = content.split('</think>', 1)[1] -%}
{%- endif %}
{{content + '<end▁of▁sentence>'}}
{%- endif %}
{%- endif %}
{%- if message['role'] == 'tool' %}
{%- set ns.is_last_user = false -%}
{%- set ns.is_tool = true -%}
{{'<tool▁output▁begin>' + message['content'] + '<tool▁output▁end>'}}
{%- endif %}
{%- endfor -%}
{%- if add_generation_prompt and ns.is_last_user and not ns.is_tool %}
{{'<Assistant>'}}
{%- if not thinking %}
{{'</think>'}}
{%- else %}
{{'<think>'}}
{%- endif %}
{% endif %}
@@ -0,0 +1,54 @@
{%- set ns = namespace(developer_content='', has_tools=false) -%}
{%- if tools is defined and tools | length > 0 -%}
{%- set ns.has_tools = true -%}
{%- endif -%}
{%- for message in messages -%}
{%- if message.role == 'developer' or message.role == 'system' -%}
<start_of_turn>user
{{ message.content }}
{%- if ns.has_tools %}
Available functions:
{%- for tool in tools %}
{%- if tool.type == 'function' %}
Function: {{ tool.function.name }}
Description: {{ tool.function.description | default('No description provided') }}
Parameters: {{ tool.function.parameters | tojson }}
{%- endif %}
{%- endfor %}
{%- endif %}
<end_of_turn>
{%- elif message.role == 'user' -%}
<start_of_turn>user
{{ message.content }}<end_of_turn>
{%- elif message.role == 'assistant' -%}
{%- if message.tool_calls is defined and message.tool_calls | length > 0 -%}
<start_of_turn>model
{%- for tool_call in message.tool_calls %}
<start_function_call>call:{{ tool_call.function.name }}{
{%- set args = tool_call.function.arguments -%}
{%- if args is string -%}
{%- set args = args | fromjson -%}
{%- endif -%}
{%- for key, value in args.items() -%}
{{ key }}:<escape>{{ value }}<escape>{% if not loop.last %},{% endif %}
{%- endfor -%}
}<end_function_call>
{%- endfor %}
<end_of_turn>
{%- else -%}
<start_of_turn>model
{{ message.content }}<end_of_turn>
{%- endif -%}
{%- elif message.role == 'tool' -%}
<start_of_turn>user
Function result for {{ message.name | default('function') }}: {{ message.content }}<end_of_turn>
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
<start_of_turn>model
{%- endif -%}
@@ -0,0 +1,123 @@
{#- Begin-of-sequence token to start the model prompt -#}
{{ bos_token }}
{#- Extracts the system message. Gemma does not support system messages so it will be prepended to first user message. -#}
{%- if messages[0]['role'] == 'system' -%}
{%- if messages[0]['content'] is string -%}
{%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}
{%- else -%}
{%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}
{%- endif -%}
{%- set loop_messages = messages[1:] -%}
{%- else -%}
{%- set first_user_prefix = "" -%}
{%- set loop_messages = messages -%}
{%- endif -%}
{#- Set tools to none if not defined for this ChatCompletion request (helps avoid errors later) -#}
{%- if not tools is defined %}
{%- set tools = none %}
{%- endif %}
{#- Validate alternating user/assistant messages (excluding 'tool' messages and ones with tool_calls) -#}
{%- for message in loop_messages | rejectattr("role", "equalto", "tool") | selectattr("tool_calls", "undefined") -%}
{%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}
{{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
{%- endif -%}
{%- endfor -%}
{#- Main loop over all messages in the conversation history -#}
{%- for message in loop_messages -%}
{#- Normalize roles for model prompt formatting -#}
{%- if (message['role'] == 'assistant') -%}
{%- set role = "model" -%}
{%- elif (message['role'] == 'tool') -%}
{%- set role = "user" -%}
{%- else -%}
{%- set role = message['role'] -%}
{%- endif -%}
{#- Mark the start of a message block with the appropriate role -#}
{{ '<start_of_turn>' + role + '\n' -}}
{#- Insert system message content (if present) at the beginning of the first message. -#}
{%- if loop.first -%}
{{ first_user_prefix }}
{#- Append system message with tool information if using tools in message request. -#}
{%- if tools is not none -%}
{{- "Tools (functions) are available. If you decide to invoke one or more of the tools, you must respond with a python list of the function calls.\n" -}}
{{- "Example Format: [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)] \n" -}}
{{- "Do not use variables. DO NOT USE MARKDOWN SYNTAX. You SHOULD NOT include any other text in the response if you call a function. If none of the functions can be used, point it out. If you lack the parameters required by the function, also point it out.\n" -}}
{{- "Here is a list of functions in JSON format that you can invoke.\n" -}}
{{- tools | tojson(indent=4) -}}
{{- "\n\n" -}}
{%- endif -%}
{%- endif -%}
{#- Format model tool calls (turns where model indicates they want to call a tool) -#}
{%- if 'tool_calls' in message -%}
{#- Opening bracket for tool call list. -#}
{{- '[' -}}
{#- For each tool call -#}
{%- for tool_call in message.tool_calls -%}
{#- Get tool call function. -#}
{%- if tool_call.function is defined -%}
{%- set tool_call = tool_call.function -%}
{%- endif -%}
{#- Function name & opening parenthesis. -#}
{{- tool_call.name + '(' -}}
{#-- Handle arguments as list (positional) or dict (named) --#}
{#-- Named arguments (dict) --#}
{%- if tool_call.arguments is iterable and tool_call.arguments is mapping -%}
{%- set first = true -%}
{%- for key, val in tool_call.arguments.items() -%}
{%- if not first %}, {% endif -%}
{{ key }}={{ val | tojson }}
{%- set first = false -%}
{%- endfor -%}
{#-- Positional arguments (list) --#}
{%- elif tool_call.arguments is iterable -%}
{{- tool_call.arguments | map('tojson') | join(', ') -}}
{#-- Fallback: single positional value --#}
{%- else -%}
{{- tool_call.arguments | tojson -}}
{#-- Closing parenthesis. --#}
{%- endif -%}
{{- ')' -}}
{#-- If more than one tool call, place comma and move to formatting next tool call --#}
{%- if not loop.last -%}, {% endif -%}
{%- endfor -%}
{#- Closing bracket for tool call list. -#}
{{- ']' -}}
{%- endif -%}
{#- Tool response start tag (for messages from a tool) -#}
{%- if (message['role'] == 'tool') -%}
{{ '<tool_response>\n' -}}
{%- endif -%}
{#- Render the message content: handle plain string or multimodal content like image/text -#}
{%- if message['content'] is string -%}
{{ message['content'] | trim }}
{%- elif message['content'] is iterable -%}
{%- for item in message['content'] -%}
{%- if item['type'] == 'image' -%}
{{ '<start_of_image>' }}
{%- elif item['type'] == 'text' -%}
{{ item['text'] | trim }}
{%- endif -%}
{%- endfor -%}
{%- else -%}
{{ raise_exception("Invalid content type") }}
{%- endif -%}
{#- Tool response end tag -#}
{%- if (message['role'] == 'tool') -%}
{{ '</tool_response>' -}}
{%- endif -%}
{#- Mark end of a single turn -#}
{{ '<end_of_turn>\n' }}
{%- endfor -%}
{#- If generation is to be triggered, add model prompt prefix -#}
{%- if add_generation_prompt -%}
{{'<start_of_turn>model\n'}}
{%- endif -%}
@@ -0,0 +1,331 @@
{%- macro format_parameters(properties, required) -%}
{%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%}
{%- set ns = namespace(found_first=false) -%}
{%- for key, value in properties | dictsort -%}
{%- set add_comma = false -%}
{%- if key not in standard_keys -%}
{%- if ns.found_first %},{% endif -%}
{%- set ns.found_first = true -%}
{{ key }}:{
{%- if value['description'] -%}
description:<|"|>{{ value['description'] }}<|"|>
{%- set add_comma = true -%}
{%- endif -%}
{%- if value['nullable'] %}
{%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
nullable:true
{%- endif -%}
{%- if value['type'] | upper == 'STRING' -%}
{%- if value['enum'] -%}
{%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
enum:{{ format_argument(value['enum']) }}
{%- endif -%}
{%- elif value['type'] | upper == 'OBJECT' -%}
,properties:{
{%- if value['properties'] is defined and value['properties'] is mapping -%}
{{- format_parameters(value['properties'], value['required'] | default([])) -}}
{%- elif value is mapping -%}
{{- format_parameters(value, value['required'] | default([])) -}}
{%- endif -%}
}
{%- if value['required'] -%}
,required:[
{%- for item in value['required'] | default([]) -%}
<|"|>{{- item -}}<|"|>
{%- if not loop.last %},{% endif -%}
{%- endfor -%}
]
{%- endif -%}
{%- elif value['type'] | upper == 'ARRAY' -%}
{%- if value['items'] is mapping and value['items'] -%}
,items:{
{%- set ns_items = namespace(found_first=false) -%}
{%- for item_key, item_value in value['items'] | dictsort -%}
{%- if item_value is not none -%}
{%- if ns_items.found_first %},{% endif -%}
{%- set ns_items.found_first = true -%}
{%- if item_key == 'properties' -%}
properties:{
{%- if item_value is mapping -%}
{{- format_parameters(item_value, value['items']['required'] | default([])) -}}
{%- endif -%}
}
{%- elif item_key == 'required' -%}
required:[
{%- for req_item in item_value -%}
<|"|>{{- req_item -}}<|"|>
{%- if not loop.last %},{% endif -%}
{%- endfor -%}
]
{%- elif item_key == 'type' -%}
{%- if item_value is string -%}
type:{{ format_argument(item_value | upper) }}
{%- else -%}
type:{{ format_argument(item_value | map('upper') | list) }}
{%- endif -%}
{%- else -%}
{{ item_key }}:{{ format_argument(item_value) }}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
}
{%- endif -%}
{%- endif -%}
{%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
type:<|"|>{{ value['type'] | upper }}<|"|>}
{%- endif -%}
{%- endfor -%}
{%- endmacro -%}
{%- macro format_function_declaration(tool_data) -%}
declaration:{{- tool_data['function']['name'] -}}{description:<|"|>{{- tool_data['function']['description'] -}}<|"|>
{%- set params = tool_data['function']['parameters'] -%}
{%- if params -%}
,parameters:{
{%- if params['properties'] -%}
properties:{ {{- format_parameters(params['properties'], params['required']) -}} },
{%- endif -%}
{%- if params['required'] -%}
required:[
{%- for item in params['required'] -%}
<|"|>{{- item -}}<|"|>
{{- ',' if not loop.last -}}
{%- endfor -%}
],
{%- endif -%}
{%- if params['type'] -%}
type:<|"|>{{- params['type'] | upper -}}<|"|>}
{%- endif -%}
{%- endif -%}
{%- if 'response' in tool_data['function'] -%}
{%- set response_declaration = tool_data['function']['response'] -%}
,response:{
{%- if response_declaration['description'] -%}
description:<|"|>{{- response_declaration['description'] -}}<|"|>,
{%- endif -%}
{%- if response_declaration['type'] | upper == 'OBJECT' -%}
type:<|"|>{{- response_declaration['type'] | upper -}}<|"|>}
{%- endif -%}
{%- endif -%}
}
{%- endmacro -%}
{%- macro format_argument(argument, escape_keys=True) -%}
{%- if argument is string -%}
{{- '<|"|>' + argument + '<|"|>' -}}
{%- elif argument is boolean -%}
{{- 'true' if argument else 'false' -}}
{%- elif argument is mapping -%}
{{- '{' -}}
{%- set ns = namespace(found_first=false) -%}
{%- for key, value in argument | dictsort -%}
{%- if ns.found_first %},{% endif -%}
{%- set ns.found_first = true -%}
{%- if escape_keys -%}
{{- '<|"|>' + key + '<|"|>' -}}
{%- else -%}
{{- key -}}
{%- endif -%}
:{{- format_argument(value, escape_keys=escape_keys) -}}
{%- endfor -%}
{{- '}' -}}
{%- elif argument is sequence -%}
{{- '[' -}}
{%- for item in argument -%}
{{- format_argument(item, escape_keys=escape_keys) -}}
{%- if not loop.last %},{% endif -%}
{%- endfor -%}
{{- ']' -}}
{%- else -%}
{{- argument -}}
{%- endif -%}
{%- endmacro -%}
{%- macro strip_thinking(text) -%}
{%- set ns = namespace(result='') -%}
{%- for part in text.split('<channel|>') -%}
{%- if '<|channel>' in part -%}
{%- set ns.result = ns.result + part.split('<|channel>')[0] -%}
{%- else -%}
{%- set ns.result = ns.result + part -%}
{%- endif -%}
{%- endfor -%}
{{- ns.result | trim -}}
{%- endmacro -%}
{%- macro format_tool_response_block(tool_name, response) -%}
{{- '<|tool_response>' -}}
{%- if response is mapping -%}
{{- 'response:' + tool_name + '{' -}}
{%- for key, value in response | dictsort -%}
{{- key -}}:{{- format_argument(value, escape_keys=False) -}}
{%- if not loop.last %},{% endif -%}
{%- endfor -%}
{{- '}' -}}
{%- else -%}
{{- 'response:' + tool_name + '{value:' + format_argument(response, escape_keys=False) + '}' -}}
{%- endif -%}
{{- '<tool_response|>' -}}
{%- endmacro -%}
{%- set ns = namespace(prev_message_type=None) -%}
{%- set loop_messages = messages -%}
{{ bos_token }}
{%- if (enable_thinking is defined and enable_thinking) or tools or messages[0]['role'] in ['system', 'developer'] -%}
{{- '<|turn>system\n' -}}
{%- if enable_thinking is defined and enable_thinking -%}
{{- '<|think|>' -}}
{%- set ns.prev_message_type = 'think' -%}
{%- endif -%}
{%- if messages[0]['role'] in ['system', 'developer'] -%}
{{- messages[0]['content'] | trim -}}
{%- set loop_messages = messages[1:] -%}
{%- endif -%}
{%- if tools -%}
{%- for tool in tools %}
{{- '<|tool>' -}}
{{- format_function_declaration(tool) | trim -}}
{{- '<tool|>' -}}
{%- endfor %}
{%- set ns.prev_message_type = 'tool' -%}
{%- endif -%}
{{- '<turn|>\n' -}}
{%- endif %}
{%- set ns_turn = namespace(last_user_idx=-1) -%}
{%- for i in range(loop_messages | length) -%}
{%- if loop_messages[i]['role'] == 'user' -%}
{%- set ns_turn.last_user_idx = i -%}
{%- endif -%}
{%- endfor -%}
{%- for message in loop_messages -%}
{%- if message['role'] != 'tool' -%}
{%- set ns.prev_message_type = None -%}
{%- set role = 'model' if message['role'] == 'assistant' else message['role'] -%}
{#- OpenAI may emit multiple assistant messages in one tool loop (user → asst → tool → asst → tool).
Only the first of those should open <|turn>model; later ones continue the same model turn. -#}
{%- set prev_nt = namespace(role=None, found=false) -%}
{%- if loop.index0 > 0 -%}
{%- for j in range(loop.index0 - 1, -1, -1) -%}
{%- if not prev_nt.found -%}
{%- if loop_messages[j]['role'] != 'tool' -%}
{%- set prev_nt.role = loop_messages[j]['role'] -%}
{%- set prev_nt.found = true -%}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- endif -%}
{%- set continue_same_model_turn = (role == 'model' and prev_nt.role == 'assistant') -%}
{%- if not continue_same_model_turn -%}
{{- '<|turn>' + role + '\n' }}
{%- endif -%}
{%- if message.get('reasoning') and loop.index0 > ns_turn.last_user_idx and message.get('tool_calls') -%}
{{- '<|channel>thought\n' + message['reasoning'] + '\n<channel|>'}}
{%- endif -%}
{%- if message['tool_calls'] -%}
{%- for tool_call in message['tool_calls'] -%}
{%- set function = tool_call['function'] -%}
{{- '<|tool_call>call:' + function['name'] + '{' -}}
{%- if function['arguments'] is mapping -%}
{%- set ns_args = namespace(found_first=false) -%}
{%- for key, value in function['arguments'] | dictsort -%}
{%- if ns_args.found_first %},{% endif -%}
{%- set ns_args.found_first = true -%}
{{- key -}}:{{- format_argument(value, escape_keys=False) -}}
{%- endfor -%}
{%- elif function['arguments'] is string -%}
{{- function['arguments'] -}}
{%- endif -%}
{{- '}<tool_call|>' -}}
{%- endfor -%}
{%- set ns.prev_message_type = 'tool_call' -%}
{%- endif -%}
{%- set ns_tr_out = namespace(flag=false) -%}
{%- if message.get('tool_responses') -%}
{#- Legacy: tool_responses embedded on the assistant message -#}
{%- for tool_response in message['tool_responses'] -%}
{{- format_tool_response_block(tool_response['name'] | default('unknown'), tool_response['response']) -}}
{%- set ns_tr_out.flag = true -%}
{%- set ns.prev_message_type = 'tool_response' -%}
{%- endfor -%}
{%- elif message.get('tool_calls') -%}
{#- OpenAI Chat Completions: consecutive following messages with role "tool" (no break/continue; range scan) -#}
{%- set ns_tool_scan = namespace(stopped=false) -%}
{%- for k in range(loop.index0 + 1, loop_messages | length) -%}
{%- if ns_tool_scan.stopped -%}
{%- elif loop_messages[k]['role'] != 'tool' -%}
{%- set ns_tool_scan.stopped = true -%}
{%- else -%}
{%- set follow = loop_messages[k] -%}
{%- set ns_tname = namespace(name=follow.get('name') | default('unknown')) -%}
{%- for tc in message['tool_calls'] -%}
{%- if tc.get('id') == follow.get('tool_call_id') -%}
{%- set ns_tname.name = tc['function']['name'] -%}
{%- endif -%}
{%- endfor -%}
{%- set tool_body = follow.get('content') -%}
{%- if tool_body is string -%}
{{- format_tool_response_block(ns_tname.name, tool_body) -}}
{%- elif tool_body is sequence and tool_body is not string -%}
{%- set ns_txt = namespace(s='') -%}
{%- for part in tool_body -%}
{%- if part.get('type') == 'text' -%}
{%- set ns_txt.s = ns_txt.s + (part.get('text') | default('')) -%}
{%- endif -%}
{%- endfor -%}
{{- format_tool_response_block(ns_tname.name, ns_txt.s) -}}
{%- else -%}
{{- format_tool_response_block(ns_tname.name, tool_body) -}}
{%- endif -%}
{%- set ns_tr_out.flag = true -%}
{%- set ns.prev_message_type = 'tool_response' -%}
{%- endif -%}
{%- endfor -%}
{%- endif -%}
{%- if message['content'] is string -%}
{%- if role == 'model' -%}
{{- strip_thinking(message['content']) -}}
{%- else -%}
{{- message['content'] | trim -}}
{%- endif -%}
{%- elif message['content'] is sequence -%}
{%- for item in message['content'] -%}
{%- if item['type'] == 'text' -%}
{%- if role == 'model' -%}
{{- strip_thinking(item['text']) -}}
{%- else -%}
{{- item['text'] | trim -}}
{%- endif -%}
{%- elif item['type'] == 'image' -%}
{{- '\n\n<|image|>\n\n' -}}
{%- set ns.prev_message_type = 'image' -%}
{%- elif item['type'] == 'audio' -%}
{{- '<|audio|>' -}}
{%- set ns.prev_message_type = 'audio' -%}
{%- elif item['type'] == 'video' -%}
{{- '\n\n<|video|>\n\n' -}}
{%- set ns.prev_message_type = 'video' -%}
{%- endif -%}
{%- endfor -%}
{%- endif -%}
{%- if not (ns_tr_out.flag and not message.get('content')) -%}
{{- '<turn|>\n' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{%- if ns.prev_message_type != 'tool_response' -%}
{{- '<|turn>model\n' -}}
{%- endif -%}
{%- if not enable_thinking | default(false) -%}
{{- '<|channel>thought\n<channel|>' -}}
{%- endif -%}
{%- endif -%}
@@ -0,0 +1,54 @@
{%- set counter = namespace(index=0) -%}
{%- if not tools is defined %}
{%- set tools = none %}
{%- endif %}
{%- if messages and messages[0]['role'] == 'system' %}
{%- set system_message = messages[0]['content']|trim %}
{%- set messages = messages[1:] %}
{%- else %}
{%- set system_message = "You are a helpful assistant." %}
{%- endif %}
{%- if tools is not none %}
{%- set tool_instruction %}
You have access to the following tools. When you need to call a tool, you MUST use the following format:
<tool_call>function_name
<arg_key>parameter_name</arg_key>
<arg_value>parameter_value</arg_value>
</tool_call>
Important rules:
- Always wrap tool calls with <tool_call>...</tool_call> tags
- Put the function name on the first line after <tool_call>
- Use <arg_key> and <arg_value> tags for each parameter
- If a parameter value is a string, keep it as-is. If it's a number or boolean, convert it appropriately
- You can make multiple tool calls if needed
- If no tool is suitable, respond with regular text
Available tools:
{% endset %}
{{- tool_instruction + "\n\n" }}
{%- for t in tools %}
{{- t | tojson(indent=4) }}
{{- "\n\n" }}
{%- endfor %}
{%- endif %}
{%- for message in messages -%}
{%- if message['role'] == 'user' -%}
{{- '[Round ' + counter.index|string + ']\n问:' + message['content'] -}}
{%- set counter.index = counter.index + 1 -%}
{%- endif -%}
{%- if message['role'] == 'assistant' -%}
{{- '\n答:' + message['content'] -}}
{%- if (loop.last and add_generation_prompt) or not loop.last -%}
{{- '\n' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt and messages[-1]['role'] != 'assistant' -%}
{{- '\n答:' -}}
{%- endif -%}
@@ -0,0 +1,36 @@
{%- if tools %}
{{- '<|start_of_role|>available_tools<|end_of_role|>
' }}
{%- for tool in tools %}
{{- tool | tojson(indent=4) }}
{%- if not loop.last %}
{{- '
' }}
{%- endif %}
{%- endfor %}
{{- '<|end_of_text|>
' }}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' %}
{{- '<|start_of_role|>system<|end_of_role|>' + message['content'] + '<|end_of_text|>
' }}
{%- elif message['role'] == 'user' %}
{{- '<|start_of_role|>user<|end_of_role|>' + message['content'] + '<|end_of_text|>
' }}
{%- elif message['role'] == 'assistant_tool_call' or (message['role'] == 'assistant' and message.tool_calls is defined) %}
{{- '<|start_of_role|>assistant<|end_of_role|><|tool_call|>' + message.tool_calls|map(attribute='function')|list|tojson(indent=4) + '<|end_of_text|>
' }}
{%- elif message['role'] == 'assistant' %}
{{- '<|start_of_role|>assistant<|end_of_role|>' + message['content'] + '<|end_of_text|>
' }}
{%- elif message['role'] == 'tool_response' or message['role'] == 'tool' %}
{{- '<|start_of_role|>tool_response<|end_of_role|>' + message['content'] + '<|end_of_text|>
' }}
{%- endif %}
{%- if loop.last and add_generation_prompt %}
{{- '<|start_of_role|>assistant<|end_of_role|>' }}
{%- endif %}
{%- endfor %}
@@ -0,0 +1,130 @@
{%- macro json_to_python_type(json_spec) %}
{%- set basic_type_map = {
"string": "str",
"number": "float",
"integer": "int",
"boolean": "bool"
} %}
{%- if basic_type_map[json_spec.type] is defined %}
{{- basic_type_map[json_spec.type] }}
{%- elif json_spec.type == "array" %}
{{- "list[" + json_to_python_type(json_spec|items) + "]" }}
{%- elif json_spec.type == "object" %}
{%- if json_spec.additionalProperties is defined %}
{{- "dict[str, " + json_to_python_type(json_spec.additionalProperties) + ']' }}
{%- else %}
{{- "dict" }}
{%- endif %}
{%- elif json_spec.type is iterable %}
{{- "Union[" }}
{%- for t in json_spec.type %}
{{- json_to_python_type({"type": t}) }}
{%- if not loop.last %}
{{- "," }}
{%- endif %}
{%- endfor %}
{{- "]" }}
{%- else %}
{{- "Any" }}
{%- endif %}
{%- endmacro %}
{%- if not full_function_description is defined %}
{%- set full_function_description = false %}
{%- endif %}
{%- macro full_description(tool) %}
{{- tool.name + '(' }}
{%- if tool.parameters is defined %}
{%- for param_name, param_fields in tool.parameters.properties|items %}
{{- param_name + ": " + json_to_python_type(param_fields) }}
{%- if not loop.last %}
{{- ", " }}
{%- endif %}
{%- endfor %}
{%- endif %}
{{- ")" }}
{%- if tool.return is defined %}
{{- " -> " + json_to_python_type(tool.return) }}
{%- endif %}
{{- " - " + tool.description + "\n\n" }}
{%- if tool.parameters is defined %}
{%- for param_name, param_fields in tool.parameters.properties|items %}
{%- if loop.first %}
{{- " Args:\n" }}
{%- endif %}
{{- " " + param_name + "(" + json_to_python_type(param_fields) + "): " + param_fields.description|trim }}
{%- endfor %}
{%- endif %}
{%- if tool.return is defined and tool.return.description is defined %}
{{- "\n Returns:\n " + tool.return.description }}
{%- endif %}
{{- '"' }}
{%- endmacro %}
{%- macro simple_description(tool) %}
{{- tool.description }}
{%- endmacro %}
{%- macro function_description(tool) %}
{%- if full_function_description %}
{{- full_description(tool) }}
{%- else %}
{{- simple_description(tool) }}
{%- endif %}
{%- endmacro %}
{%- if messages[0]["role"] == "system" %}
{%- set sys_prompt = messages[0]["content"] %}
{%- set loop_messages = messages[1:] %}
{%- else %}
{%- set loop_messages = messages %}
{% set sys_prompt = 'You are a helpful assistant with access to the following function calls. Your task is to understand the given conversation with function calls and responses and generate natural language response as the ASSISTANT to continue the conversation. You may use the following function calls to understand how to respond to the user query.' %}
{%- endif %}
{{ 'SYSTEM: ' + sys_prompt }}
{% if tools is iterable and tools | length > 0 %}
<|function_call_library|>
{%- for tool in tools %}
{%- if tool.function is defined %}
{%- set tool = tool.function %}
{%- endif %}
{{- '{"name": "' + tool.name + '", ' }}
{{- '"description": "' + function_description(tool) }}
{{- ', "parameters": ' }}
{%- if not tool.parameters is defined or tool.parameters.properties | length == 0 %}
{{- "{}" }}
{%- else %}
{{- tool.parameters|tojson }}
{%- endif %}
{{- "}" }}
{%- if not loop.last %}
{{- "\n" }}
{%- endif %}
{%- endfor %}
If none of the functions are relevant or the given question lacks the parameters required by the function, please output \"<function_call> {\"name\": \"no_function\", \"arguments\": {}}\".
{%- endif %}
{% for message in messages %}
{% if message['role'] == 'user' %}
{{- '\nUSER: ' + message['content'] }}
{% elif message['role'] == 'assistant' and message.tool_calls is defined %}
{{- '\nASSISTANT:' }}
{% for tc in message.tool_calls %}
{{- '<function_call> ' + {'name': tc.function.name, 'arguments': tc.function.arguments}|tojson }}
{% endfor %}
{{- '<|endoftext|>' }}
{% elif message['role'] == 'assistant' %}
{{- '\nASSISTANT: ' + message['content'] + ' <|endoftext|>' }}
{% elif message['role'] == 'tool' %}
{{- '<function_response> ' + message['content'] }}
{%- else %}
{{- raise_exception("Unexpected combination of role and message content") }}
{% endif %}
{% if loop.last and add_generation_prompt %}
{{- '\nASSISTANT: ' }}
{% endif %}
{% endfor %}
@@ -0,0 +1,130 @@
{%- macro json_to_python_type(json_spec) %}
{%- set basic_type_map = {
"string": "str",
"number": "float",
"integer": "int",
"boolean": "bool"
} %}
{%- if basic_type_map[json_spec.type] is defined %}
{{- basic_type_map[json_spec.type] }}
{%- elif json_spec.type == "array" %}
{{- "list[" + json_to_python_type(json_spec|items) + "]" }}
{%- elif json_spec.type == "object" %}
{%- if json_spec.additionalProperties is defined %}
{{- "dict[str, " + json_to_python_type(json_spec.additionalProperties) + ']' }}
{%- else %}
{{- "dict" }}
{%- endif %}
{%- elif json_spec.type is iterable %}
{{- "Union[" }}
{%- for t in json_spec.type %}
{{- json_to_python_type({"type": t}) }}
{%- if not loop.last %}
{{- "," }}
{%- endif %}
{%- endfor %}
{{- "]" }}
{%- else %}
{{- "Any" }}
{%- endif %}
{%- endmacro %}
{{- bos_token }}
{{- "<|im_start|>system\nYou are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: <tools> " }}
{%- if tools is iterable and tools | length > 0 %}
{%- for tool in tools %}
{%- if tool.function is defined %}
{%- set tool = tool.function %}
{%- endif %}
{{- '{"type": "function", "function": ' }}
{{- '{"name": "' + tool.name + '", ' }}
{{- '"description": "' + tool.name + '(' }}
{%- for param_name, param_fields in tool.parameters.properties|items %}
{{- param_name + ": " + json_to_python_type(param_fields) }}
{%- if not loop.last %}
{{- ", " }}
{%- endif %}
{%- endfor %}
{{- ")" }}
{%- if tool.return is defined %}
{{- " -> " + json_to_python_type(tool.return) }}
{%- endif %}
{{- " - " + tool.description + "\n\n" }}
{%- for param_name, param_fields in tool.parameters.properties|items %}
{%- if loop.first %}
{{- " Args:\n" }}
{%- endif %}
{{- " " + param_name + "(" + json_to_python_type(param_fields) + "): " + param_fields.description|trim }}
{%- endfor %}
{%- if tool.return is defined and tool.return.description is defined %}
{{- "\n Returns:\n " + tool.return.description }}
{%- endif %}
{{- '"' }}
{{- ', "parameters": ' }}
{%- if tool.parameters.properties | length == 0 %}
{{- "{}" }}
{%- else %}
{{- tool.parameters|tojson }}
{%- endif %}
{{- "}" }}
{%- if not loop.last %}
{{- "\n" }}
{%- endif %}
{%- endfor %}
{%- endif %}
{{- " </tools>" }}
{{- 'Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}}
' }}
{{- "For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
" }}
{{- "<tool_call>
" }}
{{- '{"name": <function-name>, "arguments": <args-dict>}
' }}
{{- '</tool_call><|im_end|>' }}
{%- for message in messages %}
{%- if message.role == "user" or message.role == "system" or (message.role == "assistant" and message.tool_calls is not defined) %}
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
{%- elif message.role == "assistant" and message.tool_calls is defined %}
{{- '<|im_start|>' + message.role }}
{%- for tool_call in message.tool_calls %}
{{- '\n<tool_call>\n' }}
{%- if tool_call.function is defined %}
{%- set tool_call = tool_call.function %}
{%- endif %}
{{- '{' }}
{{- '"name": "' }}
{{- tool_call.name }}
{{- '"' }}
{%- if tool_call.arguments is defined %}
{{- ', ' }}
{{- '"arguments": ' }}
{{- tool_call.arguments|tojson }}
{%- endif %}
{{- '}' }}
{{- '\n</tool_call>' }}
{%- endfor %}
{{- '<|im_end|>\n' }}
{%- elif message.role == "tool" %}
{%- if loop.previtem and loop.previtem.role != "tool" %}
{{- '<|im_start|>tool\n' }}
{%- endif %}
{{- '<tool_response>\n' }}
{{- message.content }}
{%- if not loop.last %}
{{- '\n</tool_response>\n' }}
{%- else %}
{{- '\n</tool_response>' }}
{%- endif %}
{%- if not loop.last and loop.nextitem.role != "tool" %}
{{- '<|im_end|>' }}
{%- elif loop.last %}
{{- '<|im_end|>' }}
{%- endif %}
{%- endif %}
{%- endfor %}
{%- if add_generation_prompt %}
{{- '<|im_start|>assistant\n' }}
{%- endif %}
@@ -0,0 +1,113 @@
{% set loop_messages = messages %}
{% if tools %}
{% set weekday_map = {'Monday': '星期一', 'Tuesday': '星期二', 'Wednesday': '星期三', 'Thursday': '星期四', 'Friday': '星期五', 'Saturday': '星期六', 'Sunday': '星期日'} %}
{% set weekday_cn = weekday_map[strftime_now('%A')] %}
{% set datetime_str = strftime_now('%Y-%m-%d %H:%M:%S') %}
{% set datetime_str = datetime_str + ' ' + weekday_cn %}
{% for message in loop_messages %}
{% if 'content' in message %}
{% set content = message['content'] %}
{% else %}
{% set content = '' %}
{% endif %}
{% if loop.index0 == 0 %}
{% set content_tmp = '你是一位函数组合专家。你会得到一个问题和一组可能的函数。根据问题,你需要进行一个或多个函数/工具调用以实现目的。
如果没有一个函数可以使用,请直接使用自然语言回复用户,以助手:开头。
如果给定的问题缺少函数所需的参数,请使用自然语言进行提问,向用户询问必要信息,以助手:开头。
如果调用结果已经足够回答用户问题,请对历史结果进行总结,使用自然语言回复用户,以助手:开头。
你应该只在工具调用部分返回函数调用。如果你决定调用任何函数,你必须将其格式化为<tool_calls>[{"name": "func_name1", "arguments": {"argument1": "value1", "argument2": "value2"}},...]</tool_calls>。你不应该在回复中包含任何其他文本。以下是你可以调用的函数列表,格式为JSON。
' %}
{% set content_tmp = content_tmp + '
' + tools | tojson + '
' %}
{% if message['role'] == 'system' %}
{% set content_tmp = content_tmp + '
额外要求:
' + content + '
如果你决定返回函数调用,请将其格式化为<tool_calls>[{"name": "func_name1", "arguments": {"argument1": "value1", "argument2": "value2"}},...]</tool_calls>,不得包含其他文本。如果额外要求里有格式要求,请忽略,以此处为准。
否则,请参考开头说的三种情况,以助手:开头进行回复。
如果额外要求里有时间信息,就以额外要求里的时间为准,否则,参考当前时间:' + datetime_str %}
{% set content = '<|startoftext|>' + content_tmp + '<|extra_4|>' %}
{% elif message['role'] == 'user' %}
{% set content_tmp = content_tmp + '
如果你决定返回函数调用,请将其格式化为<tool_calls>[{"name": "func_name1", "arguments": {"argument1": "value1", "argument2": "value2"}},...]</tool_calls>,不得包含其他文本。
否则,请参考开头说的三种情况,以助手:开头进行回复。
当前时间:' + datetime_str %}
{% set content_tmp = '<|startoftext|>' + content_tmp + '<|extra_4|>'%}
{% set content = content_tmp + '用户:' + content + '<|extra_0|>' %}
{% endif %}
{% else %}
{% if message['role'] == 'user' %}
{% set content = '用户:' + content + '<|extra_0|>' %}
{% elif message['role'] == 'assistant' %}
{% if 'tool_calls' in message %}
{% set tool_calls = message['tool_calls'] %}
{% set ns = namespace(tool_calls="[") %}
{% for tool_call in tool_calls %}
{% set function = tool_call['function'] %}
{% set name = function['name'] %}
{% set ns.tool_calls = ns.tool_calls + '{"name": "' + name + '", '%}
{% set arguments = function['arguments'] %}
{% if arguments is not string %}
{% set arguments = arguments | tojson %}
{% endif %}
{% set ns.tool_calls = ns.tool_calls + '"arguments": ' + arguments + '}' %}
{% if not loop.last %}
{% set ns.tool_calls = ns.tool_calls + ', '%}
{% endif %}
{% endfor %}
{% set ns.tool_calls = ns.tool_calls + ']' %}
{% set content = content + '<tool_calls>' + ns.tool_calls + '</tool_calls>' %}
{% else %}
{% set content = '助手:' + content %}
{% endif %}
{% set content = content + '<|eos|>' %}
{% elif message['role'] == 'tool' %}
{% if content is not string %}
{set content = content | tojson }
{% endif %}
{% set content = '<tool_response>' + content + '</tool_response>' %}
{% set content = content + '<|extra_0|>' %}
{% endif %}
{% endif %}
{{- content -}}
{% endfor %}
{% else %}
{% set context = {'has_head': true} %}
{% for message in loop_messages %}
{% if 'content' in message %}
{% set content = message['content'] %}
{% else %}
{% set content = '' %}
{% endif %}
{% if loop.index0 == 0 %}
{% if content == '' %}
{% set _ = context.update({'has_head': false}) %}
{% elif message['role'] == 'system' %}
{% set content = '<|startoftext|>' + content + '<|extra_4|>' %}
{% endif %}
{% endif %}
{% if message['role'] == 'user' %}
{% if loop.index0 == 1 and not context.has_head %}
{% set content = '<|startoftext|>' + content %}
{% endif %}
{% if loop.index0 == 1 and context.has_head %}
{% set content = content + '<|extra_0|>' %}
{% else %}
{% set content = '<|startoftext|>' + content + '<|extra_0|>' %}
{% endif %}
{% elif message['role'] == 'assistant' %}
{% set content = content + '<|eos|>' %}
{% elif message['role'] == 'tool' %}
{% set content = content + '<|extra_0|>' %}
{% endif %}
{{- content -}}
{% endfor %}
{% endif %}
{%- if enable_thinking is defined and enable_thinking is false %}
{{- '<think>\n\n</think>\n' }}
{%- endif %}
@@ -0,0 +1,60 @@
{%- if messages[0]["role"] == "system" %}
{%- set system_message = messages[0]["content"] %}
{%- set loop_messages = messages[1:] %}
{%- else %}
{%- set loop_messages = messages %}
{%- endif %}
{%- if not tools is defined %}
{%- set tools = none %}
{%- endif %}
{{- bos_token }}
{%- if system_message is defined %}
{{- "<|im_start|>system\n" + system_message + "<|im_end|>\n" }}
{%- endif %}
{%- if tools is not none %}
{{- "<|im_start|>system name=<|plugin|>\n[" }}
{%- for tool in tools %}
{{- tool.function|tojson }}
{%- if not loop.last %}
{{- ", " }}
{%- else %}
{{- "]" }}
{%- endif %}
{%- endfor %}
{{- "<|im_end|>\n" }}
{%- endif %}
{%- for message in loop_messages %}
{%- if message["role"] == "user" %}
{{- "<|im_start|>user\n" + message["content"] + "<|im_end|>\n"}}
{%- elif message.tool_calls is defined and message.tool_calls is not none %}
{%- set content = message["content"] if message["content"] else "" %}
{{- "<|im_start|>assistant\n" + content }}
{%- for tool_call in message.tool_calls %}
{%- set function=tool_call.function %}
{{- "<|action_start|><|plugin|>\n" }}
{{- '{"name": "' + function.name + '", '}}
{{- '"arguments": ' + function.arguments|tojson + '}' }}
{{- "<|action_end|>" }}
{%- endfor %}
{{- "<|im_end|>\n" }}
{%- elif message["role"] == "assistant" %}
{{- "<|im_start|>assistant\n" + message["content"] + "<|im_end|>\n"}}
{%- elif message["role"] == "tool_results" or message["role"] == "tool" or message["role"] == "function" %}
{%- if message.content is defined and message.content.content is defined %}
{%- set content = message.content.content %}
{%- else %}
{%- set content = message.content %}
{%- endif %}
{{- "<|im_start|>environment name=<|plugin|>\n" + content|string + "<|im_end|>\n" }}
{%- else %}
{{- raise_exception("Only user and assistant and tool_results and tool and function roles are supported, with the exception of an initial optional system message!") }}
{%- endif %}
{%- endfor %}
{%- if add_generation_prompt %}
{{- '<|im_start|>assistant\n' }}
{%- endif %}
@@ -0,0 +1,120 @@
{{- bos_token }}
{%- if custom_tools is defined %}
{%- set tools = custom_tools %}
{%- endif %}
{%- if not tools_in_user_message is defined %}
{#- Llama 3.1 doesn't pass all tests if the tools are in the system prompt #}
{%- set tools_in_user_message = true %}
{%- endif %}
{%- if not date_string is defined %}
{%- if strftime_now is defined %}
{%- set date_string = strftime_now("%d %b %Y") %}
{%- else %}
{%- set date_string = "26 Jul 2024" %}
{%- endif %}
{%- endif %}
{%- if not tools is defined %}
{%- set tools = none %}
{%- endif %}
{#- This block extracts the system message, so we can slot it into the right place. #}
{%- if messages[0]['role'] == 'system' %}
{%- if messages[0]['content'] is string %}
{%- set system_message = messages[0]['content']|trim %}
{%- else %}
{%- set system_message = messages[0]['content'][0]['text']|trim %}
{%- endif %}
{%- set messages = messages[1:] %}
{%- else %}
{%- if tools is not none %}
{%- set system_message = "You are a helpful assistant with tool calling capabilities. Only reply with a tool call if the function exists in the library provided by the user. If it doesn't exist, just reply directly in natural language. When you receive a tool call response, use the output to format an answer to the original user question." %}
{%- else %}
{%- set system_message = "" %}
{%- endif %}
{%- endif %}
{#- System message #}
{{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
{%- if tools is not none %}
{{- "Environment: ipython\n" }}
{%- endif %}
{{- "Cutting Knowledge Date: December 2023\n" }}
{{- "Today Date: " + date_string + "\n\n" }}
{%- if tools is not none and not tools_in_user_message %}
{{- "You have access to the following functions. To call a function, please respond with JSON for a function call. " }}
{{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}. ' }}
{{- "Do not use variables.\n\n" }}
{%- for t in tools %}
{{- t | tojson(indent=4) }}
{{- "\n\n" }}
{%- endfor %}
{%- endif %}
{{- system_message }}
{{- "<|eot_id|>" }}
{#- Custom tools are passed in a user message with some extra guidance #}
{%- if tools_in_user_message and not tools is none %}
{#- Extract the first user message so we can plug it in here #}
{%- if messages | length != 0 %}
{%- if messages[0]['content'] is string %}
{%- set first_user_message = messages[0]['content']|trim %}
{%- else %}
{%- set first_user_message = messages[0]['content'] | selectattr('type', 'equalto', 'text') | map(attribute='text') | map('trim') | join('\n') %}
{%- endif %}
{%- set messages = messages[1:] %}
{%- else %}
{{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
{%- endif %}
{{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}
{{- "Given the following functions, please respond with a JSON for a function call " }}
{{- "with its proper arguments that best answers the given prompt.\n\n" }}
{{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}. ' }}
{{- "Do not use variables.\n\n" }}
{%- for t in tools %}
{{- t | tojson(indent=4) }}
{{- "\n\n" }}
{%- endfor %}
{{- first_user_message + "<|eot_id|>"}}
{%- endif %}
{%- for message in messages %}
{%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
{{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' }}
{%- if message['content'] is string %}
{{- message['content'] | trim}}
{%- else %}
{%- for content in message['content'] %}
{%- if content['type'] == 'text' %}
{{- content['text'] | trim }}
{%- endif %}
{%- endfor %}
{%- endif %}
{{- '<|eot_id|>' }}
{%- elif 'tool_calls' in message %}
{%- if not message.tool_calls|length == 1 %}
{{- raise_exception("This model only supports single tool-calls at once!") }}
{%- endif %}
{%- set tool_call = message.tool_calls[0].function %}
{{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
{{- '{"name": "' + tool_call.name + '", ' }}
{{- '"parameters": ' }}
{{- tool_call.arguments | tojson }}
{{- "}" }}
{{- "<|eot_id|>" }}
{%- elif message.role == "tool" or message.role == "ipython" %}
{{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }}
{%- if message.content is string %}
{{- { "output": message.content } | tojson }}
{%- else %}
{%- for content in message['content'] %}
{%- if content['type'] == 'text' %}
{{- { "output": content['text'] } | tojson }}
{%- endif %}
{%- endfor %}
{%- endif %}
{{- "<|eot_id|>" }}
{%- endif %}
{%- endfor %}
{%- if add_generation_prompt %}
{{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
{%- endif %}

Some files were not shown because too many files have changed in this diff Show More