[TRTC-121] [feat] Add recipe selector UI to complement the recipe database (#10125)

Signed-off-by: Venky Ganesh <23023424+venkywonka@users.noreply.github.com>
This commit is contained in:
Venky 2025-12-25 10:26:54 +05:30 committed by GitHub
parent a9eb5afc9f
commit c059e6caa1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 1124 additions and 71 deletions

View File

@ -0,0 +1,76 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import sys
from pathlib import Path
from docutils import nodes
from docutils.parsers.rst import Directive, directives
from sphinx.util import logging
LOGGER = logging.getLogger(__name__)
class TRTLLMConfigSelector(Directive):
"""Embed the interactive config selector widget."""
has_content = False
option_spec = {
"models": directives.unchanged,
"config_db": directives.unchanged,
}
def run(self):
models = (self.options.get("models") or "").strip()
config_db = (self.options.get("config_db") or "").strip()
attrs = ['data-trtllm-config-selector="1"']
if models:
attrs.append(f'data-models="{models}"')
if config_db:
attrs.append(f'data-config-db="{config_db}"')
html = f"<div {' '.join(attrs)}></div>"
return [nodes.raw("", html, format="html")]
def _ensure_repo_root_on_syspath() -> Path:
repo_root = Path(__file__).resolve().parents[3]
if str(repo_root) not in sys.path:
sys.path.insert(0, str(repo_root))
return repo_root
def _write_config_db_json(app) -> None:
builder = getattr(app, "builder", None)
if not builder:
return
if builder.name not in {"html", "dirhtml"}:
return
_ensure_repo_root_on_syspath()
from examples.configs.database.database import DATABASE_LIST_PATH
from scripts.generate_config_table import generate_json
out_static = Path(builder.outdir) / "_static"
out_static.mkdir(parents=True, exist_ok=True)
out_path = out_static / "config_db.json"
generate_json(Path(DATABASE_LIST_PATH), output_file=out_path)
LOGGER.info("Wrote config selector database: %s", out_path)
def _on_build_finished(app, exception) -> None:
if exception is not None:
return
_write_config_db_json(app)
def setup(app):
app.add_css_file("config_selector.css")
app.add_js_file("config_selector.js")
app.add_directive("trtllm_config_selector", TRTLLMConfigSelector)
# Generate config_db.json into the HTML output _static directory at build time.
app.connect("build-finished", _on_build_finished)
return {"version": "0.1", "parallel_read_safe": True, "parallel_write_safe": True}

View File

@ -40,6 +40,6 @@
.. note::
The configs here are specifically optimized for a target ISL/OSL (Input/Output Sequence Length) of 1024/1024. If your traffic pattern is different, refer to the :ref:`Comprehensive Configuration Database` section below which covers a larger set of traffic patterns and performance profiles.
The configs here are specifically optimized for a target ISL/OSL (Input/Output Sequence Length) of 1024/1024. If your traffic pattern is different, refer to the :ref:`Preconfigured Recipes` section below which covers a larger set of traffic patterns and performance profiles.
.. end-note-quick-start-isl-osl

View File

@ -0,0 +1,153 @@
.trtllm-config-selector {
border: 1px solid rgba(0, 0, 0, 0.08);
border-radius: 10px;
padding: 16px;
margin: 16px 0;
}
.trtllm-config-selector__header {
margin-bottom: 12px;
}
.trtllm-config-selector__subtitle {
font-size: 0.95rem;
opacity: 0.8;
margin-top: 4px;
}
.trtllm-config-selector__form {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(220px, 1fr));
gap: 12px;
margin-top: 12px;
}
.trtllm-config-selector__label {
display: flex;
align-items: center;
gap: 8px;
font-size: 0.85rem;
margin-bottom: 6px;
opacity: 0.9;
}
.trtllm-config-selector__step {
flex: 0 0 auto;
width: 16px;
height: 16px;
display: inline-flex;
align-items: center;
justify-content: center;
border-radius: 999px;
font-size: 0.68rem;
font-weight: 600;
letter-spacing: 0.01em;
border: 1px solid rgba(127, 127, 127, 0.18);
background: rgba(127, 127, 127, 0.06);
color: inherit;
opacity: 0.65;
}
.trtllm-config-selector__labelText {
line-height: 1.2;
}
.trtllm-config-selector__select {
width: 100%;
padding: 8px 10px;
border-radius: 8px;
border: 1px solid rgba(0, 0, 0, 0.18);
background: transparent;
}
.trtllm-config-selector__output {
margin-top: 14px;
}
.trtllm-config-selector__cmd {
margin: 0;
padding: 10px 12px;
border-radius: 10px;
border: 1px solid rgba(0, 0, 0, 0.12);
overflow-x: auto;
white-space: pre-wrap;
overflow-wrap: anywhere;
position: relative;
padding-right: 54px; /* room for inline copy button */
}
.trtllm-config-selector__meta {
margin-top: 8px;
font-size: 0.9rem;
opacity: 0.85;
}
.trtllm-config-selector__yamlDetails {
margin-top: 12px;
}
.trtllm-config-selector__yamlSummary {
cursor: pointer;
font-weight: 600;
}
.trtllm-config-selector__yamlBox {
margin-top: 10px;
}
.trtllm-config-selector__yamlPre {
margin: 0;
padding: 10px 12px;
border-radius: 10px;
border: 1px solid rgba(0, 0, 0, 0.12);
overflow-x: auto;
max-height: 520px;
position: relative;
padding-right: 54px; /* room for inline copy button */
}
.trtllm-config-selector__copyInline {
position: absolute;
top: 8px;
right: 8px;
font-size: 0.85rem;
padding: 6px 10px;
border-radius: 10px;
border: 1px solid rgba(0, 0, 0, 0.12);
background: rgba(255, 255, 255, 0.9);
cursor: pointer;
}
.trtllm-config-selector__copyInline:disabled {
opacity: 0.5;
cursor: not-allowed;
}
.trtllm-config-selector__copyInline:hover:not(:disabled) {
background: rgba(255, 255, 255, 1);
}
.trtllm-config-selector__configLink {
text-decoration: underline;
}
.yaml-key {
font-weight: 600;
}
.yaml-comment {
opacity: 0.7;
}
.yaml-punct,
.yaml-bool,
.yaml-num,
.yaml-str {
opacity: 0.9;
}
.trtllm-config-selector__error {
margin-top: 10px;
font-size: 0.9rem;
opacity: 0.85;
}

View File

@ -0,0 +1,591 @@
(function () {
"use strict";
let dbPromise = null;
let widgetId = 0;
function $(root, sel) {
return root.querySelector(sel);
}
function el(tag, attrs = {}, children = []) {
const node = document.createElement(tag);
for (const [k, v] of Object.entries(attrs)) {
if (k === "class") node.className = String(v);
else if (k === "text") node.textContent = String(v);
else if (k.startsWith("data-")) node.setAttribute(k, String(v));
else if (k === "for") node.htmlFor = String(v);
else node.setAttribute(k, String(v));
}
for (const c of children) node.appendChild(c);
return node;
}
function uniqBy(arr, keyFn) {
const seen = new Set();
const out = [];
for (const x of arr) {
const k = keyFn(x);
if (!seen.has(k)) {
seen.add(k);
out.push(x);
}
}
return out;
}
function sortStrings(a, b) {
return String(a).localeCompare(String(b));
}
function sortNums(a, b) {
return Number(a) - Number(b);
}
async function loadDb(dbUrl) {
if (!dbPromise) {
dbPromise = fetch(dbUrl, { credentials: "same-origin" }).then((r) => {
if (!r.ok) {
throw new Error(`Failed to load config DB (${r.status}): ${dbUrl}`);
}
return r.json();
});
}
return dbPromise;
}
function defaultDbUrl() {
const scriptEl = document.querySelector('script[src*="config_selector.js"]');
if (scriptEl && scriptEl.src) {
const u = new URL(scriptEl.src, document.baseURI);
u.pathname = u.pathname.replace(/config_selector\.js$/, "config_db.json");
u.search = "";
u.hash = "";
return u.toString();
}
return new URL("_static/config_db.json", document.baseURI).toString();
}
async function copyText(text) {
if (navigator.clipboard && navigator.clipboard.writeText) {
await navigator.clipboard.writeText(text);
return;
}
const ta = el("textarea", { "aria-hidden": "true" });
ta.value = text;
ta.style.position = "fixed";
ta.style.left = "-9999px";
document.body.appendChild(ta);
ta.select();
document.execCommand("copy");
document.body.removeChild(ta);
}
function escapeHtml(s) {
return String(s)
.replaceAll("&", "&amp;")
.replaceAll("<", "&lt;")
.replaceAll(">", "&gt;")
.replaceAll('"', "&quot;")
.replaceAll("'", "&#039;");
}
function highlightYaml(yamlText) {
const lines = String(yamlText).split("\n");
const out = [];
function highlightScalar(raw) {
const m = String(raw).match(/^(\s*)(.*?)(\s*)$/);
const lead = m ? m[1] : "";
const core = m ? m[2] : String(raw);
const trail = m ? m[3] : "";
const t = core.trim();
if (!t) return escapeHtml(raw);
const boolNull = /^(true|false|null|~)$/;
const num = /^-?\d+(\.\d+)?$/;
const dq = t.length >= 2 && t.startsWith('"') && t.endsWith('"');
const sq = t.length >= 2 && t.startsWith("'") && t.endsWith("'");
if (boolNull.test(t)) {
return `${escapeHtml(lead)}<span class="yaml-bool">${escapeHtml(core)}</span>${escapeHtml(trail)}`;
}
if (num.test(t)) {
return `${escapeHtml(lead)}<span class="yaml-num">${escapeHtml(core)}</span>${escapeHtml(trail)}`;
}
if (dq || sq) {
return `${escapeHtml(lead)}<span class="yaml-str">${escapeHtml(core)}</span>${escapeHtml(trail)}`;
}
return escapeHtml(raw);
}
for (const line of lines) {
const hashIdx = line.indexOf("#");
const hasComment = hashIdx >= 0;
const codePart = hasComment ? line.slice(0, hashIdx) : line;
const commentPart = hasComment ? line.slice(hashIdx) : "";
const mList = codePart.match(/^(\s*)(-\s+)?(.*)$/);
const indent = mList ? mList[1] : "";
const dash = mList && mList[2] ? mList[2] : "";
const rest = mList ? mList[3] : codePart;
const idx = rest.indexOf(":");
let html = "";
if (idx >= 0) {
const keyRaw = rest.slice(0, idx);
const after = rest.slice(idx + 1);
html += escapeHtml(indent);
if (dash) html += `<span class="yaml-punct">-</span>${escapeHtml(dash.slice(1))}`;
html += `<span class="yaml-key">${escapeHtml(keyRaw.trimEnd())}</span>`;
html += `<span class="yaml-punct">:</span>`;
html += highlightScalar(after);
} else {
html += escapeHtml(indent);
if (dash) html += `<span class="yaml-punct">-</span>${escapeHtml(dash.slice(1))}`;
html += highlightScalar(rest);
}
if (commentPart) {
html += `<span class="yaml-comment">${escapeHtml(commentPart)}</span>`;
}
out.push(html);
}
return out.join("\n");
}
function formatCommand(entry) {
const model = entry.model || "";
const configPath = entry.config_path || "";
if (!model || !configPath) return entry.command || "";
return [
`trtllm-serve ${model} \\`,
` --config \${TRTLLM_DIR}/${configPath}`,
].join("\n");
}
function parseCsvModels(s) {
if (!s) return null;
const parts = String(s)
.split(",")
.map((x) => x.trim())
.filter(Boolean);
return parts.length ? parts : null;
}
function initOne(container, payload) {
const allowedModels = parseCsvModels(container.getAttribute("data-models"));
const allEntries = Array.isArray(payload.entries) ? payload.entries : [];
const entries = allowedModels
? allEntries.filter((e) => allowedModels.includes(e.model))
: allEntries.slice();
const modelsInfo = payload.models || {};
const state = {
model: "",
topology: "",
islOsl: "",
profile: "",
concurrency: "",
};
container.innerHTML = "";
container.classList.add("trtllm-config-selector");
const header = el("div", { class: "trtllm-config-selector__header" }, [
el("div", {
class: "trtllm-config-selector__subtitle",
text: "Select a model + deployment shape to generate a trtllm-serve command.",
}),
]);
const form = el("div", { class: "trtllm-config-selector__form" });
function mkSelect(labelText, id, stepNumber) {
const label = el("label", {
class: "trtllm-config-selector__label",
for: id,
});
label.appendChild(
el("span", {
class: "trtllm-config-selector__step",
"aria-hidden": "true",
text: String(stepNumber),
}),
);
label.appendChild(
el("span", {
class: "trtllm-config-selector__labelText",
text: labelText,
}),
);
const select = el("select", { class: "trtllm-config-selector__select", id });
const wrap = el("div", { class: "trtllm-config-selector__field" }, [label, select]);
return { wrap, select };
}
const id = ++widgetId;
const selModel = mkSelect("Model", `trtllm-model-${id}`, 1);
const selTopo = mkSelect("GPU(s)", `trtllm-topo-${id}`, 2);
const selSeq = mkSelect("ISL / OSL", `trtllm-seq-${id}`, 3);
const selProf = mkSelect("Performance profile", `trtllm-prof-${id}`, 4);
const selConc = mkSelect("Concurrency", `trtllm-conc-${id}`, 5);
form.appendChild(selModel.wrap);
form.appendChild(selTopo.wrap);
form.appendChild(selSeq.wrap);
form.appendChild(selProf.wrap);
form.appendChild(selConc.wrap);
const output = el("div", { class: "trtllm-config-selector__output" });
const cmdPre = el("pre", { class: "trtllm-config-selector__cmd" }, [
el("code", { class: "trtllm-config-selector__cmdcode", text: "" }),
]);
const cmdCopyBtn = el("button", {
class: "trtllm-config-selector__copyInline",
type: "button",
title: "Copy command",
"aria-label": "Copy command",
text: "Copy",
});
const meta = el("div", { class: "trtllm-config-selector__meta", text: "" });
output.appendChild(cmdPre);
output.appendChild(meta);
cmdPre.appendChild(cmdCopyBtn);
const yamlDetails = el("details", { class: "trtllm-config-selector__yamlDetails" }, [
el("summary", { class: "trtllm-config-selector__yamlSummary", text: "Show config YAML" }),
]);
const yamlBox = el("div", { class: "trtllm-config-selector__yamlBox" });
const yamlPre = el("pre", { class: "trtllm-config-selector__yamlPre" }, [
el("code", { class: "trtllm-config-selector__yamlCode", text: "" }),
]);
const yamlCopyBtn = el("button", {
class: "trtllm-config-selector__copyInline",
type: "button",
title: "Copy YAML",
"aria-label": "Copy YAML",
text: "Copy",
});
yamlBox.appendChild(yamlPre);
yamlDetails.appendChild(yamlBox);
output.appendChild(yamlDetails);
yamlPre.appendChild(yamlCopyBtn);
const errorBox = el("div", { class: "trtllm-config-selector__error", text: "" });
container.appendChild(header);
container.appendChild(form);
container.appendChild(output);
container.appendChild(errorBox);
const yamlCache = new Map();
let currentEntry = null;
let currentYamlText = "";
const yamlCodeEl = $(yamlPre, "code");
async function fetchYamlFor(entry) {
const url = entry.config_raw_url || "";
if (!url) return null;
if (yamlCache.has(url)) return yamlCache.get(url) || "";
const r = await fetch(url, { credentials: "omit" });
if (!r.ok) throw new Error(`Failed to fetch YAML (${r.status}): ${url}`);
const txt = await r.text();
yamlCache.set(url, txt);
return txt;
}
function resetYamlPanel() {
yamlDetails.open = false;
yamlDetails.dataset.state = "idle";
yamlCodeEl.textContent = "";
yamlCopyBtn.disabled = true;
currentYamlText = "";
}
resetYamlPanel();
yamlDetails.addEventListener("toggle", async () => {
if (!yamlDetails.open) return;
if (!currentEntry) {
yamlDetails.dataset.state = "idle";
yamlCodeEl.textContent = "Select a configuration above to view its YAML.";
return;
}
if (yamlDetails.dataset.state === "loaded") return;
if (yamlDetails.dataset.state === "loading") return;
const e = currentEntry;
if (!e.config_raw_url) {
yamlDetails.dataset.state = "error";
yamlCodeEl.textContent = "No raw URL available for this config.";
return;
}
yamlDetails.dataset.state = "loading";
yamlCodeEl.textContent = `Loading YAML from ${e.config_raw_url}`;
try {
const txt = await fetchYamlFor(e);
currentYamlText = txt || "";
yamlDetails.dataset.state = "loaded";
yamlCodeEl.innerHTML = highlightYaml(currentYamlText);
yamlCopyBtn.disabled = !currentYamlText;
} catch (err) {
yamlDetails.dataset.state = "error";
yamlCopyBtn.disabled = true;
yamlCodeEl.textContent = `Failed to load YAML.\n\n${String(err)}`;
}
});
yamlCopyBtn.addEventListener("click", async () => {
const txt = currentYamlText || yamlCodeEl.textContent || "";
if (!txt) return;
try {
await copyText(txt);
yamlCopyBtn.textContent = "Copied";
setTimeout(() => (yamlCopyBtn.textContent = "Copy"), 1200);
} catch (_) {
yamlCopyBtn.textContent = "Copy failed";
setTimeout(() => (yamlCopyBtn.textContent = "Copy"), 1500);
}
});
function setSelectOptions(select, options, value, placeholder) {
select.innerHTML = "";
select.appendChild(el("option", { value: "", text: placeholder || "Select…" }));
for (const opt of options) {
select.appendChild(el("option", { value: opt.value, text: opt.label }));
}
select.value = value || "";
select.disabled = options.length === 0;
}
function filteredByState(prefixOnly = false) {
return entries.filter((e) => {
if (state.model && e.model !== state.model) return false;
if (state.topology) {
const [ng, gpu] = state.topology.split("|");
if (String(e.num_gpus) !== ng || e.gpu !== gpu) return false;
}
if (state.islOsl) {
const [isl, osl] = state.islOsl.split("|");
if (String(e.isl) !== isl || String(e.osl) !== osl) return false;
}
if (!prefixOnly && state.profile && e.performance_profile !== state.profile) return false;
if (!prefixOnly && state.concurrency && String(e.concurrency) !== state.concurrency) return false;
return true;
});
}
function render() {
errorBox.textContent = "";
// Model options
const modelOpts = uniqBy(
entries.map((e) => e.model),
(m) => m
)
.sort(sortStrings)
.map((m) => {
const info = modelsInfo[m];
const label = info && info.display_name ? `${info.display_name} (${m})` : m;
return { value: m, label };
});
if (state.model && !modelOpts.some((o) => o.value === state.model)) state.model = "";
if (!state.model && modelOpts.length === 1) state.model = modelOpts[0].value;
setSelectOptions(selModel.select, modelOpts, state.model, "Select a model…");
// GPU(s) options
const topoEntries = entries.filter((e) => !state.model || e.model === state.model);
const topoOpts = uniqBy(
topoEntries.map((e) => ({
value: `${e.num_gpus}|${e.gpu}`,
label: e.gpu_display || `${e.num_gpus}x${e.gpu}`,
num_gpus: e.num_gpus,
gpu: e.gpu,
})),
(o) => o.value
)
.sort((a, b) => sortNums(a.num_gpus, b.num_gpus) || sortStrings(a.gpu, b.gpu));
if (state.topology && !topoOpts.some((o) => o.value === state.topology)) state.topology = "";
if (!state.topology && topoOpts.length === 1) state.topology = topoOpts[0].value;
setSelectOptions(selTopo.select, topoOpts, state.topology, "Select GPU(s)…");
// ISL/OSL options
const seqEntries = entries.filter((e) => {
if (state.model && e.model !== state.model) return false;
if (state.topology) {
const [ng, gpu] = state.topology.split("|");
if (String(e.num_gpus) !== ng || e.gpu !== gpu) return false;
}
return true;
});
const seqOpts = uniqBy(
seqEntries.map((e) => ({
value: `${e.isl}|${e.osl}`,
label: `${e.isl} / ${e.osl}`,
isl: e.isl,
osl: e.osl,
})),
(o) => o.value
).sort((a, b) => sortNums(a.isl, b.isl) || sortNums(a.osl, b.osl));
if (state.islOsl && !seqOpts.some((o) => o.value === state.islOsl)) state.islOsl = "";
if (!state.islOsl && seqOpts.length === 1) state.islOsl = seqOpts[0].value;
setSelectOptions(selSeq.select, seqOpts, state.islOsl, "Select ISL/OSL…");
// Profile options
const prefEntries = filteredByState(true);
const profOpts = uniqBy(
prefEntries.map((e) => e.performance_profile),
(p) => p
)
.sort(sortStrings)
.map((p) => ({ value: p, label: p }));
if (state.profile && !profOpts.some((o) => o.value === state.profile)) state.profile = "";
if (!state.profile && profOpts.length === 1) state.profile = profOpts[0].value;
// Prefer Balanced if present (nicer default).
if (!state.profile && profOpts.some((o) => o.value === "Balanced")) state.profile = "Balanced";
setSelectOptions(selProf.select, profOpts, state.profile, "Select a profile…");
// Concurrency options (filtered by profile if chosen)
const profEntries2 = filteredByState(true).filter((e) => !state.profile || e.performance_profile === state.profile);
const concOpts = uniqBy(
profEntries2.map((e) => ({ value: String(e.concurrency), label: String(e.concurrency), conc: e.concurrency })),
(o) => o.value
).sort((a, b) => sortNums(a.conc, b.conc));
if (state.concurrency && !concOpts.some((o) => o.value === state.concurrency)) state.concurrency = "";
if (!state.concurrency && concOpts.length === 1) state.concurrency = concOpts[0].value;
setSelectOptions(selConc.select, concOpts, state.concurrency, "Select concurrency…");
// Resolve final selection
const finalEntries = filteredByState(false).filter((e) => {
if (state.profile && e.performance_profile !== state.profile) return false;
if (state.concurrency && String(e.concurrency) !== state.concurrency) return false;
return true;
});
const code = cmdPre.querySelector("code");
if (finalEntries.length === 1) {
const e = finalEntries[0];
code.textContent = formatCommand(e);
cmdCopyBtn.disabled = !code.textContent;
meta.textContent = "";
meta.appendChild(el("span", { text: "Config: " }));
const cfgHref = e.config_github_url || e.config_raw_url || "";
if (cfgHref) {
meta.appendChild(
el("a", {
class: "trtllm-config-selector__configLink",
href: cfgHref,
target: "_blank",
rel: "noopener",
text: e.config_path || cfgHref,
})
);
} else {
meta.appendChild(el("span", { text: e.config_path || "" }));
}
currentEntry = e;
resetYamlPanel();
} else {
code.textContent = "";
cmdCopyBtn.disabled = true;
meta.textContent = "";
currentEntry = null;
resetYamlPanel();
if (entries.length === 0) {
errorBox.textContent = "No configuration entries available for this page.";
} else if (state.model && topoOpts.length === 0) {
errorBox.textContent = "No matching topologies for this model.";
} else if (state.topology && seqOpts.length === 0) {
errorBox.textContent = "No matching ISL/OSL options for this selection.";
} else if (state.islOsl && profOpts.length === 0) {
errorBox.textContent = "No matching performance profiles for this selection.";
} else if (state.profile && concOpts.length === 0) {
errorBox.textContent = "No matching concurrencies for this profile.";
} else if (state.model && state.topology && state.islOsl && state.profile && state.concurrency) {
errorBox.textContent = "Selection did not resolve to a single configuration.";
} else {
errorBox.textContent = "Select options above to generate a command.";
}
}
}
selModel.select.addEventListener("change", () => {
state.model = selModel.select.value;
state.topology = "";
state.islOsl = "";
state.profile = "";
state.concurrency = "";
render();
});
selTopo.select.addEventListener("change", () => {
state.topology = selTopo.select.value;
state.islOsl = "";
state.profile = "";
state.concurrency = "";
render();
});
selSeq.select.addEventListener("change", () => {
state.islOsl = selSeq.select.value;
state.profile = "";
state.concurrency = "";
render();
});
selProf.select.addEventListener("change", () => {
state.profile = selProf.select.value;
state.concurrency = "";
render();
});
selConc.select.addEventListener("change", () => {
state.concurrency = selConc.select.value;
render();
});
cmdCopyBtn.addEventListener("click", async () => {
const code = $(cmdPre, "code");
const txt = (code && code.textContent) || "";
if (!txt) return;
try {
await copyText(txt);
cmdCopyBtn.textContent = "Copied";
setTimeout(() => (cmdCopyBtn.textContent = "Copy"), 1200);
} catch (e) {
cmdCopyBtn.textContent = "Copy failed";
setTimeout(() => (cmdCopyBtn.textContent = "Copy"), 1500);
}
});
render();
}
async function main() {
const containers = Array.from(document.querySelectorAll("[data-trtllm-config-selector]"));
if (!containers.length) return;
const first = containers[0];
const dbPath = first.getAttribute("data-config-db");
const dbUrl = dbPath
? new URL(dbPath, document.baseURI).toString()
: defaultDbUrl();
try {
const payload = await loadDb(dbUrl);
for (const c of containers) initOne(c, payload);
} catch (err) {
for (const c of containers) {
c.textContent = `Failed to load configuration database: ${String(err)}`;
}
}
}
if (document.readyState === "loading") {
document.addEventListener("DOMContentLoaded", main);
} else {
main();
}
})();

View File

@ -15,6 +15,7 @@ import pygit2
from docutils import nodes
sys.path.insert(0, os.path.abspath('.'))
sys.path.insert(0, os.path.abspath('_ext'))
project = 'TensorRT LLM'
copyright = '2025, NVidia'
@ -43,6 +44,13 @@ version = version_module.__version__
templates_path = ['_templates']
exclude_patterns = ['performance/performance-tuning-guide/introduction.md']
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
CPP_XML_INDEX = os.path.abspath(
os.path.join(SCRIPT_DIR, "..", "cpp_docs", "xml", "index.xml"))
HAS_CPP_XML = os.path.exists(CPP_XML_INDEX)
if not HAS_CPP_XML:
exclude_patterns.append('_cpp_gen/**')
extensions = [
'sphinx.ext.duration',
'sphinx.ext.autodoc',
@ -51,7 +59,6 @@ extensions = [
'sphinx.ext.napoleon',
'sphinx.ext.mathjax',
'myst_parser', # for markdown support
"breathe",
'sphinx.ext.todo',
'sphinx.ext.autosectionlabel',
'sphinxarg.ext',
@ -59,8 +66,12 @@ extensions = [
'sphinx_copybutton',
'sphinxcontrib.autodoc_pydantic',
'sphinx_togglebutton',
'trtllm_config_selector',
]
if HAS_CPP_XML:
extensions.append("breathe")
autodoc_member_order = 'bysource'
autodoc_pydantic_model_show_json = True
autodoc_pydantic_model_show_config_summary = True
@ -140,12 +151,11 @@ html_theme_options = {
]
}
# ------------------------ C++ Doc related --------------------------
# Breathe configuration
breathe_default_project = "TensorRT-LLM"
breathe_projects = {"TensorRT-LLM": "../cpp_docs/xml"}
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
if HAS_CPP_XML:
breathe_default_project = "TensorRT-LLM"
breathe_projects = {"TensorRT-LLM": "../cpp_docs/xml"}
else:
breathe_projects = {}
CPP_INCLUDE_DIR = os.path.join(SCRIPT_DIR, '../../cpp/include/tensorrt_llm')
CPP_GEN_DIR = os.path.join(SCRIPT_DIR, '_cpp_gen')
@ -206,10 +216,11 @@ Runtime
.. It is also doable to automatically generate this file and list all the modules in the conf.py
""".strip()
# compile cpp doc
subprocess.run(['mkdir', '-p', CPP_GEN_DIR])
gen_cpp_doc(CPP_GEN_DIR + '/runtime.rst', CPP_INCLUDE_DIR + '/runtime',
runtime_summary)
if HAS_CPP_XML:
# compile cpp doc
subprocess.run(['mkdir', '-p', CPP_GEN_DIR])
gen_cpp_doc(CPP_GEN_DIR + '/runtime.rst', CPP_INCLUDE_DIR + '/runtime',
runtime_summary)
executor_summary = f"""
Executor
@ -220,6 +231,7 @@ Executor
.. It is also doable to automatically generate this file and list all the modules in the conf.py
""".strip()
subprocess.run(['mkdir', '-p', CPP_GEN_DIR])
gen_cpp_doc(CPP_GEN_DIR + '/executor.rst', CPP_INCLUDE_DIR + '/executor',
executor_summary)
if HAS_CPP_XML:
subprocess.run(['mkdir', '-p', CPP_GEN_DIR])
gen_cpp_doc(CPP_GEN_DIR + '/executor.rst', CPP_INCLUDE_DIR + '/executor',
executor_summary)

View File

@ -1,13 +1,15 @@
.. start-config-table-note
.. include:: ../_includes/note_sections.rst
:start-after: .. start-note-traffic-patterns
:end-before: .. end-note-traffic-patterns
.. end-config-table-note
.. start-deepseek-ai/DeepSeek-R1-0528
.. _deepseek-ai/DeepSeek-R1-0528:
`DeepSeek-R1 <https://huggingface.co/deepseek-ai/DeepSeek-R1-0528>`_
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. list-table::
:width: 100%
@ -148,7 +150,7 @@
.. _nvidia/DeepSeek-R1-0528-FP4-v2:
`DeepSeek-R1 (NVFP4) <https://huggingface.co/nvidia/DeepSeek-R1-0528-FP4-v2>`_
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. list-table::
:width: 100%
@ -337,7 +339,7 @@
.. _openai/gpt-oss-120b:
`gpt-oss-120b <https://huggingface.co/openai/gpt-oss-120b>`_
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. list-table::
:width: 100%

View File

@ -432,13 +432,24 @@ $$
## Preconfigured Recipes
The following tables list recommended configurations from the comprehensive database for different performance profiles.
The following sections help you pick a known-good `trtllm-serve --config` for your target GPU and traffic pattern.
### Recipe selector
```{eval-rst}
.. trtllm_config_selector::
:models: deepseek-ai/DeepSeek-R1-0528, nvidia/DeepSeek-R1-0528-FP4-v2
```
```{eval-rst}
.. include:: ../_includes/note_sections.rst
:start-after: .. start-note-traffic-patterns
:end-before: .. end-note-traffic-patterns
```
### Recipe database
```{eval-rst}
.. include:: config_table.rst
:start-after: .. start-deepseek-ai/DeepSeek-R1-0528
:end-before: .. end-deepseek-ai/DeepSeek-R1-0528

View File

@ -380,13 +380,24 @@ $$
## Preconfigured Recipes
The following table lists recommended configurations from the comprehensive database for different performance profiles.
The following sections help you pick a known-good `trtllm-serve --config` for your target GPU and traffic pattern.
### Recipe selector
```{eval-rst}
.. trtllm_config_selector::
:models: openai/gpt-oss-120b
```
```{eval-rst}
.. include:: ../_includes/note_sections.rst
:start-after: .. start-note-traffic-patterns
:end-before: .. end-note-traffic-patterns
```
### Recipe database
```{eval-rst}
.. include:: config_table.rst
:start-after: .. start-openai/gpt-oss-120b
:end-before: .. end-openai/gpt-oss-120b

View File

@ -100,9 +100,26 @@ The deployment guides below provide more detailed instructions for serving speci
deployment-guide-for-qwen3-next-on-trtllm.md
deployment-guide-for-kimi-k2-thinking-on-trtllm.md
Comprehensive Configuration Database
------------------------------------
Preconfigured Recipes
---------------------
.. _recipe-selector:
Recipe selector
^^^^^^^^^^^^^^^
.. trtllm_config_selector::
.. include:: ../_includes/note_sections.rst
:start-after: .. start-note-traffic-patterns
:end-before: .. end-note-traffic-patterns
.. _recipe-database:
Recipe database
^^^^^^^^^^^^^^^
The table below lists all available pre-configured model scenarios in the TensorRT LLM configuration database. Each row represents a specific model, GPU, and performance profile combination with recommended request settings.
.. include:: config_table.rst
:start-after: .. end-config-table-note

View File

@ -14,15 +14,18 @@
# limitations under the License.
from __future__ import annotations
import json
import os
import sys
from collections import defaultdict
from dataclasses import asdict, dataclass
from pathlib import Path
SCRIPT_DIR = Path(__file__).parent.resolve()
REPO_ROOT = SCRIPT_DIR.parent
# Add repo root to path for examples.configs.database import
if str(REPO_ROOT) not in sys.path:
sys.path.insert(0, str(REPO_ROOT))
@ -48,27 +51,113 @@ MODEL_INFO = {
}
def generate_rst(yaml_path, output_file=None):
"""Generate RST table from YAML config database.
@dataclass(frozen=True)
class RecipeRow:
model: str
model_display_name: str
model_url: str
gpu: str
num_gpus: int
isl: int
osl: int
concurrency: int
config_path: str
gpu_display: str
performance_profile: str
command: str
config_filename: str
config_github_url: str
config_raw_url: str
Args:
yaml_path: Path to lookup.yaml (str or Path)
output_file: Optional output file path. If None, prints to stdout.
"""
def _model_display_and_url(model: str) -> tuple[str, str]:
if model in MODEL_INFO:
info = MODEL_INFO[model]
return info["display_name"], info["url"]
return model, ""
def build_rows(yaml_path) -> list[RecipeRow]:
recipe_list = RecipeList.from_yaml(Path(yaml_path))
# Group by model -> (gpu, num_gpus, isl, osl) -> list of recipes
model_groups = defaultdict(lambda: defaultdict(list))
for recipe in recipe_list:
key = (recipe.gpu, recipe.num_gpus, recipe.isl, recipe.osl)
model_groups[recipe.model][key].append(recipe)
rows: list[RecipeRow] = []
sorted_models = sorted(model_groups.keys())
for model in sorted_models:
subgroups = model_groups[model]
sorted_keys = sorted(
subgroups.keys(),
key=lambda k: (str(k[0]), int(k[1] or 0), int(k[2] or 0), int(k[3] or 0)),
)
model_display_name, model_url = _model_display_and_url(model)
for key in sorted_keys:
entries = subgroups[key]
entries.sort(key=lambda x: x.concurrency)
for idx, entry in enumerate(entries):
gpu = entry.gpu
num_gpus = entry.num_gpus
gpu_display = f"{num_gpus}x{gpu}" if num_gpus and num_gpus > 1 else gpu
isl = entry.isl
osl = entry.osl
conc = entry.concurrency
config_path = entry.config_path
profile = assign_profile(len(entries), idx, conc)
command = f"trtllm-serve {model} --config ${{TRTLLM_DIR}}/{config_path}"
config_filename = os.path.basename(config_path)
config_github_url = (
f"https://github.com/NVIDIA/TensorRT-LLM/blob/main/{config_path}"
)
config_raw_url = (
f"https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/{config_path}"
)
rows.append(
RecipeRow(
model=model,
model_display_name=model_display_name,
model_url=model_url,
gpu=gpu,
num_gpus=num_gpus,
isl=isl,
osl=osl,
concurrency=conc,
config_path=config_path,
gpu_display=gpu_display,
performance_profile=profile,
command=command,
config_filename=config_filename,
config_github_url=config_github_url,
config_raw_url=config_raw_url,
)
)
return rows
def generate_rst(yaml_path, output_file=None):
rows = build_rows(yaml_path)
model_groups = defaultdict(list)
for row in rows:
model_groups[row.model].append(row)
lines = []
# Include note_sections.rst at the top (relative include for Sphinx)
lines.append(".. start-config-table-note")
lines.append(".. include:: ../_includes/note_sections.rst")
lines.append(" :start-after: .. start-note-traffic-patterns")
lines.append(" :end-before: .. end-note-traffic-patterns")
lines.append(".. end-config-table-note")
lines.append("")
sorted_models = sorted(model_groups.keys())
@ -77,16 +166,16 @@ def generate_rst(yaml_path, output_file=None):
lines.append(f".. start-{model}")
lines.append("")
if model in MODEL_INFO:
info = MODEL_INFO[model]
title_text = f"`{info['display_name']} <{info['url']}>`_"
model_display_name, model_url = _model_display_and_url(model)
if model_url:
title_text = f"`{model_display_name} <{model_url}>`_"
else:
title_text = model
lines.append(f".. _{model}:")
lines.append("")
lines.append(title_text)
lines.append("^" * len(title_text))
lines.append("~" * len(title_text))
lines.append("")
lines.append(".. list-table::")
@ -101,42 +190,25 @@ def generate_rst(yaml_path, output_file=None):
lines.append(" - Config")
lines.append(" - Command")
subgroups = model_groups[model]
sorted_keys = sorted(
subgroups.keys(),
key=lambda k: (str(k[0]), int(k[1] or 0), int(k[2] or 0), int(k[3] or 0)),
entries = sorted(
model_groups[model],
key=lambda r: (
str(r.gpu),
int(r.num_gpus or 0),
int(r.isl or 0),
int(r.osl or 0),
int(r.concurrency or 0),
),
)
for key in sorted_keys:
entries = subgroups[key]
entries.sort(key=lambda x: x.concurrency)
n = len(entries)
for idx, entry in enumerate(entries):
gpu = entry.gpu
num_gpus = entry.num_gpus
gpu_display = f"{num_gpus}x{gpu}" if num_gpus and num_gpus > 1 else gpu
isl = entry.isl
osl = entry.osl
conc = entry.concurrency
config_path = entry.config_path
profile = assign_profile(n, idx, conc)
full_config_path = config_path
command = f"trtllm-serve {model} --config ${{TRTLLM_DIR}}/{full_config_path}"
config_filename = os.path.basename(full_config_path)
github_url = f"https://github.com/NVIDIA/TensorRT-LLM/blob/main/{full_config_path}"
config_link = f"`{config_filename} <{github_url}>`_"
lines.append(f" * - {gpu_display}")
lines.append(f" - {profile}")
lines.append(f" - {isl} / {osl}")
lines.append(f" - {conc}")
lines.append(f" - {config_link}")
lines.append(f" - ``{command}``")
for row in entries:
config_link = f"`{row.config_filename} <{row.config_github_url}>`_"
lines.append(f" * - {row.gpu_display}")
lines.append(f" - {row.performance_profile}")
lines.append(f" - {row.isl} / {row.osl}")
lines.append(f" - {row.concurrency}")
lines.append(f" - {config_link}")
lines.append(f" - ``{row.command}``")
lines.append("")
lines.append(f".. end-{model}")
@ -146,15 +218,44 @@ def generate_rst(yaml_path, output_file=None):
if output_file:
with open(output_file, "w") as f:
f.write(output_text)
print(f"Generated table written to: {output_file}", file=sys.stderr)
else:
print(output_text)
def generate_json(yaml_path, output_file):
rows = build_rows(yaml_path)
source_path = Path(yaml_path)
try:
source = str(source_path.relative_to(REPO_ROOT))
except ValueError:
source = str(source_path)
models = {}
for row in rows:
if row.model not in models:
models[row.model] = {
"display_name": row.model_display_name,
"url": row.model_url,
}
payload = {
"source": source,
"models": models,
"entries": [asdict(r) for r in rows],
}
with open(output_file, "w") as f:
json.dump(payload, f, indent=2, sort_keys=True)
f.write("\n")
if __name__ == "__main__":
yaml_path = DATABASE_LIST_PATH
yaml_path = Path(DATABASE_LIST_PATH)
if not yaml_path.exists():
print(f"Error: YAML file not found at {yaml_path}", file=sys.stderr)
sys.exit(1)
output_path = REPO_ROOT / "docs/source/deployment-guide/config_table.rst"
json_output_path = REPO_ROOT / "docs/source/_static/config_db.json"
generate_rst(yaml_path, output_file=output_path)
generate_json(yaml_path, output_file=json_output_path)

View File

@ -14,7 +14,9 @@
# limitations under the License.
import importlib.util
import json
import os
import sys
import tempfile
import unittest
from pathlib import Path
@ -26,8 +28,11 @@ _spec = importlib.util.spec_from_file_location(
"generate_config_table", REPO_ROOT / "scripts" / "generate_config_table.py"
)
_module = importlib.util.module_from_spec(_spec)
sys.modules[_spec.name] = _module
_spec.loader.exec_module(_module)
generate_rst = _module.generate_rst
generate_json = _module.generate_json
RecipeList = _module.RecipeList
# Dynamically load generate_config_database_tests module without modifying sys.path
_db_spec = importlib.util.spec_from_file_location(
@ -35,6 +40,7 @@ _db_spec = importlib.util.spec_from_file_location(
REPO_ROOT / "scripts" / "generate_config_database_tests.py",
)
_db_module = importlib.util.module_from_spec(_db_spec)
sys.modules[_db_spec.name] = _db_module
_db_spec.loader.exec_module(_db_module)
generate_tests = _db_module.generate_tests
TEST_LIST_PATH = _db_module.TEST_LIST_PATH
@ -76,6 +82,79 @@ class TestConfigDatabaseSync(unittest.TestCase):
"Please run 'python3 scripts/generate_config_table.py' from the repo root to update it.",
)
def test_config_db_json_generation(self):
"""Test that config_db.json generation matches lookup.yaml entries.
Validates the generated JSON payload shape and that its entries correspond 1:1
with lookup.yaml recipes, without relying on a committed JSON artifact.
"""
self.assertIsNotNone(generate_json)
self.assertIsNotNone(RecipeList)
yaml_path = os.path.join(REPO_ROOT, "examples/configs/database/lookup.yaml")
self.assertTrue(os.path.exists(yaml_path), f"YAML file not found: {yaml_path}")
recipes = RecipeList.from_yaml(Path(yaml_path))
expected_keys = {
(
r.model,
r.gpu,
int(r.num_gpus),
int(r.isl),
int(r.osl),
int(r.concurrency),
r.config_path,
)
for r in recipes
}
with tempfile.NamedTemporaryFile(mode="w+", suffix=".json", delete=True) as tmp:
generate_json(yaml_path, output_file=tmp.name)
tmp.seek(0)
payload = json.load(tmp)
self.assertEqual(
payload.get("source"),
"examples/configs/database/lookup.yaml",
"Generated JSON 'source' field is unexpected.",
)
entries = payload.get("entries") or []
for e in entries:
key = (
e.get("model"),
e.get("gpu"),
int(e.get("num_gpus")),
int(e.get("isl")),
int(e.get("osl")),
int(e.get("concurrency")),
e.get("config_path"),
)
self.assertIn(
key,
expected_keys,
f"Generated config_db.json contains an unexpected entry key: {key}",
)
expected_keys.remove(key)
cmd = e.get("command") or ""
self.assertIn("--config", cmd)
self.assertNotIn("extra_llm_api_options", cmd)
self.assertIn("${TRTLLM_DIR}/", cmd)
config_path = e.get("config_path") or ""
self.assertTrue(config_path)
gh = e.get("config_github_url") or ""
raw = e.get("config_raw_url") or ""
self.assertTrue(gh.endswith(config_path))
self.assertTrue(raw.endswith(config_path))
self.assertFalse(
expected_keys,
"Generated config_db.json is missing entries from lookup.yaml.",
)
def test_config_database_tests_sync(self):
"""Test that config database test files are synchronized with lookup.yaml.