TensorRT-LLMs/llm-api/reference.html
Kaiyu Xie 5e96c9b208
Update gh-pages (#3393)
Signed-off-by: Kaiyu Xie <26294424+kaiyux@users.noreply.github.com>
2025-04-09 11:09:18 +08:00

2543 lines
339 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!DOCTYPE html>
<html lang="en" data-content_root="../" >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<title>API Reference &#8212; tensorrt_llm</title>
<script data-cfasync="false">
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
</script>
<!--
this give us a css class that will be invisible only if js is disabled
-->
<noscript>
<style>
.pst-js-only { display: none !important; }
</style>
</noscript>
<!-- Loaded before other Sphinx assets -->
<link href="../_static/styles/theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link href="../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
<!-- So that users can add custom icons -->
<script src="../_static/scripts/fontawesome.js?digest=8878045cc6db502f8baf"></script>
<!-- Pre-loaded scripts that we'll load fully later -->
<link rel="preload" as="script" href="../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../_static/doctools.js?v=9a2dae69"></script>
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../_static/clipboard.min.js?v=a7894cd8"></script>
<script src="../_static/copybutton.js?v=65e89d2a"></script>
<script>DOCUMENTATION_OPTIONS.pagename = 'llm-api/reference';</script>
<link rel="icon" href="../_static/favicon.png"/>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="next" title="LLM Examples Introduction" href="../llm-api-examples/index.html" />
<link rel="prev" title="API Introduction" href="index.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
<i class="fa-solid fa-arrow-up"></i>Back to top</button>
<dialog id="pst-search-dialog">
<form class="bd-search d-flex align-items-center"
action="../search.html"
method="get">
<i class="fa-solid fa-magnifying-glass"></i>
<input type="search"
class="form-control"
name="q"
placeholder="Search the docs ..."
aria-label="Search the docs ..."
autocomplete="off"
autocorrect="off"
autocapitalize="off"
spellcheck="false"/>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
</form>
</dialog>
<div class="pst-async-banner-revealer d-none">
<aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
</div>
<header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
<div class="bd-header__inner bd-page-width">
<button class="pst-navbar-icon sidebar-toggle primary-toggle" aria-label="Site navigation">
<span class="fa-solid fa-bars"></span>
</button>
<div class="col-lg-3 navbar-header-items__start">
<div class="navbar-item">
<a class="navbar-brand logo" href="../index.html">
<img src="../_static/nvidia-logo-horiz-rgb-blk-for-screen.svg" class="logo__image only-light" alt="tensorrt_llm - Home"/>
<img src="../_static/nvidia-logo-horiz-rgb-wht-for-screen.svg" class="logo__image only-dark pst-js-only" alt="tensorrt_llm - Home"/>
<p class="title logo__title">tensorrt_llm</p>
</a></div>
</div>
<div class="col-lg-9 navbar-header-items">
<div class="navbar-header-items__end">
<div class="navbar-item navbar-persistent--container">
<button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
</div>
<div class="navbar-item">
<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light" title="Light"></i>
<i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark" title="Dark"></i>
<i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto" title="System Settings"></i>
</button></div>
</div>
</div>
<div class="navbar-persistent--mobile">
<button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
</div>
<button class="pst-navbar-icon sidebar-toggle secondary-toggle" aria-label="On this page">
<span class="fa-solid fa-outdent"></span>
</button>
</div>
</header>
<div class="bd-container">
<div class="bd-container__inner bd-page-width">
<dialog id="pst-primary-sidebar-modal"></dialog>
<div id="pst-primary-sidebar" class="bd-sidebar-primary bd-sidebar">
<a class="navbar-brand logo" href="../index.html">
<img src="../_static/nvidia-logo-horiz-rgb-blk-for-screen.svg" class="logo__image only-light" alt="tensorrt_llm - Home"/>
<img src="../_static/nvidia-logo-horiz-rgb-wht-for-screen.svg" class="logo__image only-dark pst-js-only" alt="tensorrt_llm - Home"/>
<p class="title logo__title">tensorrt_llm</p>
</a>
<div class="sidebar-header-items sidebar-primary__section">
<div class="sidebar-header-items__end">
<div class="navbar-item">
<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light" title="Light"></i>
<i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark" title="Dark"></i>
<i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto" title="System Settings"></i>
</button></div>
</div>
</div>
<div class="sidebar-primary-items__start sidebar-primary__section">
<div class="sidebar-primary-item">
<nav class="bd-docs-nav bd-links"
aria-label="Table of Contents">
<p class="bd-links__title" role="heading" aria-level="1">Table of Contents</p>
<div class="bd-toc-item navbar-nav"><p aria-level="2" class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
<ul class="nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="../overview.html">Overview</a></li>
<li class="toctree-l1"><a class="reference internal" href="../quick-start-guide.html">Quick Start Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="../key-features.html">Key Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../torch.html">PyTorch Backend</a></li>
<li class="toctree-l1"><a class="reference internal" href="../release-notes.html">Release Notes</a></li>
</ul>
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Installation</span></p>
<ul class="nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="../installation/linux.html">Installing on Linux</a></li>
<li class="toctree-l1"><a class="reference internal" href="../installation/build-from-source-linux.html">Building from Source Code on Linux</a></li>
<li class="toctree-l1"><a class="reference internal" href="../installation/grace-hopper.html">Installing on Grace Hopper</a></li>
</ul>
<p aria-level="2" class="caption" role="heading"><span class="caption-text">LLM API</span></p>
<ul class="current nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="index.html">API Introduction</a></li>
<li class="toctree-l1 current active"><a class="current reference internal" href="#">API Reference</a></li>
</ul>
<p aria-level="2" class="caption" role="heading"><span class="caption-text">LLM API Examples</span></p>
<ul class="nav bd-sidenav">
<li class="toctree-l1 has-children"><a class="reference internal" href="../llm-api-examples/index.html">LLM Examples Introduction</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="../llm-api-examples/llm_medusa_decoding.html">Generate Text Using Medusa Decoding</a></li>
<li class="toctree-l2"><a class="reference internal" href="../llm-api-examples/llm_multilora.html">Generate text with multiple LoRA adapters</a></li>
<li class="toctree-l2"><a class="reference internal" href="../llm-api-examples/llm_inference_async.html">Generate Text Asynchronously</a></li>
<li class="toctree-l2"><a class="reference internal" href="../llm-api-examples/llm_inference_distributed.html">Distributed LLM Generation</a></li>
<li class="toctree-l2"><a class="reference internal" href="../llm-api-examples/llm_logits_processor.html">Control generated text using logits post processor</a></li>
<li class="toctree-l2"><a class="reference internal" href="../llm-api-examples/llm_lookahead_decoding.html">Generate Text Using Lookahead Decoding</a></li>
<li class="toctree-l2"><a class="reference internal" href="../llm-api-examples/llm_quantization.html">Generation with Quantization</a></li>
<li class="toctree-l2"><a class="reference internal" href="../llm-api-examples/llm_inference_async_streaming.html">Generate Text in Streaming</a></li>
<li class="toctree-l2"><a class="reference internal" href="../llm-api-examples/llm_guided_decoding.html">Generate text with guided decoding</a></li>
<li class="toctree-l2"><a class="reference internal" href="../llm-api-examples/llm_inference.html">Generate text</a></li>
<li class="toctree-l2"><a class="reference internal" href="../llm-api-examples/llm_inference_customize.html">Generate text with customization</a></li>
<li class="toctree-l2"><a class="reference internal" href="../llm-api-examples/llm_auto_parallel.html">Automatic Parallelism with LLM</a></li>
</ul>
</details></li>
<li class="toctree-l1"><a class="reference internal" href="../llm-api-examples/customization.html">Common Customizations</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../llm-api-examples/llm_api_examples.html">Examples</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="../llm-api-examples/llm_medusa_decoding.html">Generate Text Using Medusa Decoding</a></li>
<li class="toctree-l2"><a class="reference internal" href="../llm-api-examples/llm_multilora.html">Generate text with multiple LoRA adapters</a></li>
<li class="toctree-l2"><a class="reference internal" href="../llm-api-examples/llm_inference_async.html">Generate Text Asynchronously</a></li>
<li class="toctree-l2"><a class="reference internal" href="../llm-api-examples/llm_inference_distributed.html">Distributed LLM Generation</a></li>
<li class="toctree-l2"><a class="reference internal" href="../llm-api-examples/llm_logits_processor.html">Control generated text using logits post processor</a></li>
<li class="toctree-l2"><a class="reference internal" href="../llm-api-examples/llm_lookahead_decoding.html">Generate Text Using Lookahead Decoding</a></li>
<li class="toctree-l2"><a class="reference internal" href="../llm-api-examples/llm_quantization.html">Generation with Quantization</a></li>
<li class="toctree-l2"><a class="reference internal" href="../llm-api-examples/llm_inference_async_streaming.html">Generate Text in Streaming</a></li>
<li class="toctree-l2"><a class="reference internal" href="../llm-api-examples/llm_guided_decoding.html">Generate text with guided decoding</a></li>
<li class="toctree-l2"><a class="reference internal" href="../llm-api-examples/llm_inference.html">Generate text</a></li>
<li class="toctree-l2"><a class="reference internal" href="../llm-api-examples/llm_inference_customize.html">Generate text with customization</a></li>
<li class="toctree-l2"><a class="reference internal" href="../llm-api-examples/llm_auto_parallel.html">Automatic Parallelism with LLM</a></li>
</ul>
</details></li>
</ul>
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Model Definition API</span></p>
<ul class="nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="../python-api/tensorrt_llm.layers.html">Layers</a></li>
<li class="toctree-l1"><a class="reference internal" href="../python-api/tensorrt_llm.functional.html">Functionals</a></li>
<li class="toctree-l1"><a class="reference internal" href="../python-api/tensorrt_llm.models.html">Models</a></li>
<li class="toctree-l1"><a class="reference internal" href="../python-api/tensorrt_llm.plugin.html">Plugin</a></li>
<li class="toctree-l1"><a class="reference internal" href="../python-api/tensorrt_llm.quantization.html">Quantization</a></li>
<li class="toctree-l1"><a class="reference internal" href="../python-api/tensorrt_llm.runtime.html">Runtime</a></li>
</ul>
<p aria-level="2" class="caption" role="heading"><span class="caption-text">C++ API</span></p>
<ul class="nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="../_cpp_gen/executor.html">Executor</a></li>
<li class="toctree-l1"><a class="reference internal" href="../_cpp_gen/runtime.html">Runtime</a></li>
</ul>
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Command-Line Reference</span></p>
<ul class="nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="../commands/trtllm-build.html">trtllm-build</a></li>
<li class="toctree-l1"><a class="reference internal" href="../commands/trtllm-serve.html">trtllm-serve</a></li>
</ul>
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Architecture</span></p>
<ul class="nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="../architecture/overview.html">TensorRT-LLM Architecture</a></li>
<li class="toctree-l1"><a class="reference internal" href="../architecture/core-concepts.html">Model Definition</a></li>
<li class="toctree-l1"><a class="reference internal" href="../architecture/checkpoint.html">TensorRT-LLM Checkpoint</a></li>
<li class="toctree-l1"><a class="reference internal" href="../architecture/workflow.html">TensorRT-LLM Build Workflow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../architecture/add-model.html">Adding a Model</a></li>
</ul>
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Advanced</span></p>
<ul class="nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="../advanced/gpt-attention.html">Multi-Head, Multi-Query, and Group-Query Attention</a></li>
<li class="toctree-l1"><a class="reference internal" href="../advanced/gpt-runtime.html">C++ GPT Runtime</a></li>
<li class="toctree-l1"><a class="reference internal" href="../advanced/executor.html">Executor API</a></li>
<li class="toctree-l1"><a class="reference internal" href="../advanced/graph-rewriting.html">Graph Rewriting Module</a></li>
<li class="toctree-l1"><a class="reference internal" href="../advanced/inference-request.html">Inference Request</a></li>
<li class="toctree-l1"><a class="reference internal" href="../advanced/lora.html">Run gpt-2b + LoRA using GptManager / cpp runtime</a></li>
<li class="toctree-l1"><a class="reference internal" href="../advanced/expert-parallelism.html">Expert Parallelism in TensorRT-LLM</a></li>
<li class="toctree-l1"><a class="reference internal" href="../advanced/kv-cache-reuse.html">KV cache reuse</a></li>
<li class="toctree-l1"><a class="reference internal" href="../advanced/speculative-decoding.html">Speculative Sampling</a></li>
<li class="toctree-l1"><a class="reference internal" href="../advanced/disaggregated-service.html">Disaggregated-Service (experimental)</a></li>
</ul>
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Performance</span></p>
<ul class="nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="../performance/perf-overview.html">Overview</a></li>
<li class="toctree-l1"><a class="reference internal" href="../performance/perf-benchmarking.html">Benchmarking</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../performance/performance-tuning-guide/index.html">Performance Tuning Guide</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="../performance/performance-tuning-guide/benchmarking-default-performance.html">Benchmarking Default Performance</a></li>
<li class="toctree-l2"><a class="reference internal" href="../performance/performance-tuning-guide/useful-build-time-flags.html">Useful Build-Time Flags</a></li>
<li class="toctree-l2"><a class="reference internal" href="../performance/performance-tuning-guide/tuning-max-batch-size-and-max-num-tokens.html">Tuning Max Batch Size and Max Num Tokens</a></li>
<li class="toctree-l2"><a class="reference internal" href="../performance/performance-tuning-guide/deciding-model-sharding-strategy.html">Deciding Model Sharding Strategy</a></li>
<li class="toctree-l2"><a class="reference internal" href="../performance/performance-tuning-guide/fp8-quantization.html">FP8 Quantization</a></li>
<li class="toctree-l2"><a class="reference internal" href="../performance/performance-tuning-guide/useful-runtime-flags.html">Useful Runtime Options</a></li>
</ul>
</details></li>
<li class="toctree-l1"><a class="reference internal" href="../performance/perf-analysis.html">Performance Analysis</a></li>
</ul>
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Reference</span></p>
<ul class="nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="../reference/troubleshooting.html">Troubleshooting</a></li>
<li class="toctree-l1"><a class="reference internal" href="../reference/support-matrix.html">Support Matrix</a></li>
<li class="toctree-l1"><a class="reference internal" href="../reference/precision.html">Numerical Precision</a></li>
<li class="toctree-l1"><a class="reference internal" href="../reference/memory.html">Memory Usage of TensorRT-LLM</a></li>
</ul>
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Blogs</span></p>
<ul class="nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="../blogs/H100vsA100.html">H100 has 4.6x A100 Performance in TensorRT-LLM, achieving 10,000 tok/s at 100ms to first token</a></li>
<li class="toctree-l1"><a class="reference internal" href="../blogs/H200launch.html">H200 achieves nearly 12,000 tokens/sec on Llama2-13B with TensorRT-LLM</a></li>
<li class="toctree-l1"><a class="reference internal" href="../blogs/Falcon180B-H200.html">Falcon-180B on a single H200 GPU with INT4 AWQ, and 6.7x faster Llama-70B over A100</a></li>
<li class="toctree-l1"><a class="reference internal" href="../blogs/quantization-in-TRT-LLM.html">Speed up inference with SOTA quantization techniques in TRT-LLM</a></li>
<li class="toctree-l1"><a class="reference internal" href="../blogs/XQA-kernel.html">New XQA-kernel provides 2.4x more Llama-70B throughput within the same latency budget</a></li>
</ul>
</div>
</nav></div>
</div>
<div class="sidebar-primary-items__end sidebar-primary__section">
</div>
</div>
<main id="main-content" class="bd-main" role="main">
<div class="bd-content">
<div class="bd-article-container">
<div class="bd-header-article d-print-none">
<div class="header-article-items header-article__inner">
<div class="header-article-items__start">
<div class="header-article-item">
<nav aria-label="Breadcrumb" class="d-print-none">
<ul class="bd-breadcrumbs">
<li class="breadcrumb-item breadcrumb-home">
<a href="../index.html" class="nav-link" aria-label="Home">
<i class="fa-solid fa-home"></i>
</a>
</li>
<li class="breadcrumb-item active" aria-current="page"><span class="ellipsis">API Reference</span></li>
</ul>
</nav>
</div>
</div>
</div>
</div>
<div id="searchbox"></div>
<article class="bd-article">
<section id="api-reference">
<h1>API Reference<a class="headerlink" href="#api-reference" title="Link to this heading">#</a></h1>
<dl class="py class">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.LLM">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.llmapi.</span></span><span class="sig-name descname"><span class="pre">LLM</span></span><span class="sig-paren">(</span>
<dl>
<dd><em class="sig-param"><span class="n"><span class="pre">model</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">tokenizer</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Path</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">PreTrainedTokenizerBase</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">TokenizerBase</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">tokenizer_mode</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Literal</span><span class="p"><span class="pre">[</span></span><span class="s"><span class="pre">'auto'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'slow'</span></span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'auto'</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">skip_tokenizer_init</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">trust_remote_code</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">tensor_parallel_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'auto'</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">revision</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">tokenizer_revision</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">speculative_model</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Any</span></span></em>,</dd>
</dl>
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/llmapi/llm.html#LLM"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.LLM" title="Link to this definition">#</a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
<p>LLM class is the main class for running a LLM model.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>model</strong> (<em>str</em><em> or </em><em>Path</em>) The model name or a local model directory.
Note that if the value could be both a model name or a local model directory,
the local model directory will be prioritized.</p></li>
<li><p><strong>tokenizer</strong> (<em>str</em><em>, </em><em>Path</em><em>, </em><em>TokenizerBase</em><em>, </em><em>PreTrainedTokenizerBase</em><em>, </em><em>optional</em>) The name or path of a HuggingFace Transformers tokenizer, or the loaded tokenizer.
Defaults to None.</p></li>
<li><p><strong>tokenizer_mode</strong> (<em>Literal</em><em>[</em><em>'auto'</em><em>, </em><em>'slow'</em><em>]</em>) The tokenizer mode.
auto will use the fast tokenizer if available, and slow will always use the slow tokenizer.
The fast tokenizer is based on Huggingfaces Rust library tokenizers, which achieves a significant speed-up compared to its slow counterpart.
Defaults to auto.</p></li>
<li><p><strong>skip_tokenizer_init</strong> (<em>bool</em>) If true, skip initialization of tokenizer and detokenizer.
LLM.generate and LLM.generate_async will accept prompt token ids as input only.
Defaults to False.</p></li>
<li><p><strong>trust_remote_code</strong> (<em>bool</em>) Whether to trust remote code when downloading model and tokenizer from Hugging Face. Defaults to False.</p></li>
<li><p><strong>tensor_parallel_size</strong> (<em>int</em>) The number of processes for tensor parallelism. Defaults to 1.</p></li>
<li><p><strong>dtype</strong> (<em>str</em>) The data type for the model weights and activations.
Can be “float16”, “bfloat16”, “float32”, or “auto”. If “auto”, the data type
will be automatically inferred from the source model. If the source data type
is “float32”, it will be converted to “float16”. Defaults to “auto”.</p></li>
<li><p><strong>revision</strong> (<em>str</em><em>, </em><em>optional</em>) The revision of the model to use. Defaults to None.</p></li>
<li><p><strong>tokenizer_revision</strong> (<em>str</em><em>, </em><em>optional</em>) The revision of the tokenizer to use. Defaults to None.</p></li>
<li><p><strong>pipeline_parallel_size</strong> (<em>int</em>) The pipeline parallel size. Defaults to 1.</p></li>
<li><p><strong>context_parallel_size</strong> (<em>int</em>) The context parallel size. Defaults to 1.</p></li>
<li><p><strong>load_format</strong> (<em>Literal</em><em>[</em><em>'auto'</em><em>, </em><em>'dummy'</em><em>]</em>) The format of the model weights to load.
* auto will try to load the weights from the provided checkpoint.
* dummy will initialize the weights with random values, which is mainly for profiling.
Defaults to auto.</p></li>
<li><p><strong>enable_tqdm</strong> (<em>bool</em>) Whether to display a progress bar during model building. Defaults to False.</p></li>
<li><p><strong>enable_lora</strong> (<em>bool</em>) Enable LoRA adapters. Defaults to False.</p></li>
<li><p><strong>max_lora_rank</strong> (<em>int</em><em>, </em><em>optional</em>) Maximum LoRA rank. If specified, it overrides <cite>build_config.lora_config.max_lora_rank</cite>. Defaults to None.</p></li>
<li><p><strong>max_loras</strong> (<em>int</em>) Maximum number of LoRA adapters to be stored in GPU memory. Defaults to 4.</p></li>
<li><p><strong>max_cpu_loras</strong> (<em>int</em>) Maximum number of LoRA adapters to be stored in CPU memory. Defaults to 4.</p></li>
<li><p><strong>enable_prompt_adapter</strong> (<em>bool</em>) Enable prompt adapters. Defaults to False.</p></li>
<li><p><strong>max_prompt_adapter_token</strong> (<em>int</em>) Maximum number of prompt adapter tokens. Defaults to 0.</p></li>
<li><p><strong>quant_config</strong> (<a class="reference internal" href="#tensorrt_llm.llmapi.QuantConfig" title="tensorrt_llm.llmapi.QuantConfig"><em>QuantConfig</em></a><em>, </em><em>optional</em>) The quantization configuration for the model. Defaults to None.</p></li>
<li><p><strong>calib_config</strong> (<a class="reference internal" href="#tensorrt_llm.llmapi.CalibConfig" title="tensorrt_llm.llmapi.CalibConfig"><em>CalibConfig</em></a><em>, </em><em>optional</em>) The calibration configuration for the model. Defaults to None.</p></li>
<li><p><strong>build_config</strong> (<a class="reference internal" href="#tensorrt_llm.llmapi.BuildConfig" title="tensorrt_llm.llmapi.BuildConfig"><em>BuildConfig</em></a><em>, </em><em>optional</em><em>)</em>) The build configuration for the model. Defaults to None.</p></li>
<li><p><strong>kv_cache_config</strong> (<a class="reference internal" href="#tensorrt_llm.llmapi.KvCacheConfig" title="tensorrt_llm.llmapi.KvCacheConfig"><em>KvCacheConfig</em></a><em>, </em><em>optional</em>) The key-value cache configuration for the model. Defaults to None.</p></li>
<li><p><strong>enable_chunked_prefill</strong> (<em>bool</em>) Whether to enable chunked prefill. Defaults to False.</p></li>
<li><p><strong>decoding_config</strong> (<em>DecodingConfig</em><em>, </em><em>optional</em>) The decoding configuration for the model. Defaults to None.</p></li>
<li><p><strong>guided_decoding_backend</strong> (<em>str</em><em>, </em><em>optional</em>) The guided decoding backend, currently supports xgrammar. Defaults to None.</p></li>
<li><p><strong>logits_post_processor_map</strong> (<em>Dict</em><em>[</em><em>str</em><em>, </em><em>Callable</em><em>]</em><em>, </em><em>optional</em>) A map of logit post-processing functions. Defaults to None.</p></li>
<li><p><strong>iter_stats_max_iterations</strong> (<em>int</em><em>, </em><em>optional</em>) The maximum number of iterations for iteration statistics. Defaults to None.</p></li>
<li><p><strong>request_stats_max_iterations</strong> (<em>int</em><em>, </em><em>optional</em>) The maximum number of iterations for request statistics. Defaults to None.</p></li>
<li><p><strong>workspace</strong> (<em>str</em><em>, </em><em>optional</em>) The directory to store intermediate files. Defaults to None.</p></li>
<li><p><strong>embedding_parallel_mode</strong> (<em>str</em>) The parallel mode for embeddings. Defaults to SHARDING_ALONG_VOCAB.</p></li>
<li><p><strong>auto_parallel</strong> (<em>bool</em>) Enable auto parallel mode. Defaults to False.</p></li>
<li><p><strong>auto_parallel_world_size</strong> (<em>int</em>) The MPI world size for auto parallel. Defaults to 1.</p></li>
<li><p><strong>moe_tensor_parallel_size</strong> (<em>int</em><em>, </em><em>optional</em>) The tensor parallel size for MoE modelss expert weights.</p></li>
<li><p><strong>moe_expert_parallel_size</strong> (<em>int</em><em>, </em><em>optional</em>) The expert parallel size for MoE modelss expert weights.</p></li>
<li><p><strong>fast_build</strong> (bool): Enable features for faster engine building.
This may cause some performance degradation and is currently incompatible with int8/int4 quantization.
Defaults to False.</p></li>
<li><p><strong>enable_build_cache</strong> (<em>bool</em><em>, </em><a class="reference internal" href="#tensorrt_llm.llmapi.BuildCacheConfig" title="tensorrt_llm.llmapi.BuildCacheConfig"><em>BuildCacheConfig</em></a><em>, </em><em>optional</em>) Whether to enable build caching for the model. Defaults to None.</p></li>
<li><p><strong>peft_cache_config</strong> (<em>PeftCacheConfig</em><em>, </em><em>optional</em>) The PEFT cache configuration for the model. Defaults to None.</p></li>
<li><p><strong>scheduler_config</strong> (<a class="reference internal" href="#tensorrt_llm.llmapi.SchedulerConfig" title="tensorrt_llm.llmapi.SchedulerConfig"><em>SchedulerConfig</em></a><em>, </em><em>optional</em>) The scheduler configuration for the model. Defaults to None.</p></li>
<li><p><strong>speculative_config</strong> (<a class="reference internal" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig" title="tensorrt_llm.llmapi.LookaheadDecodingConfig"><em>LookaheadDecodingConfig</em></a><em> or </em><em>other speculative configurations</em><em>, </em><em>optional</em>) The speculative decoding configuration. Defaults to None.</p></li>
<li><p><strong>batching_type</strong> (<em>BatchingType</em><em>, </em><em>optional</em>) The batching type for the model. Defaults to None.</p></li>
<li><p><strong>normalize_log_probs</strong> (<em>bool</em>) Whether to normalize log probabilities for the model. Defaults to False.</p></li>
<li><p><strong>max_batch_size</strong> (<em>int</em><em>, </em><em>optional</em>) The maximum batch size for runtime. Defaults to None.</p></li>
<li><p><strong>max_num_tokens</strong> (<em>int</em><em>, </em><em>optional</em>) The maximum number of tokens for runtime. Defaults to None.</p></li>
<li><p><strong>extended_runtime_perf_knob_config</strong> (<em>ExtendedRuntimePerfKnobConfig</em><em>, </em><em>optional</em>) The extended runtime performance knob configuration for the model. Defaults to None.</p></li>
</ul>
</dd>
</dl>
<dl class="py method">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.LLM.__init__">
<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span>
<dl>
<dd><em class="sig-param"><span class="n"><span class="pre">model</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">tokenizer</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Path</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">PreTrainedTokenizerBase</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">TokenizerBase</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">tokenizer_mode</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Literal</span><span class="p"><span class="pre">[</span></span><span class="s"><span class="pre">'auto'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'slow'</span></span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'auto'</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">skip_tokenizer_init</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">trust_remote_code</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">tensor_parallel_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'auto'</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">revision</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">tokenizer_revision</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">speculative_model</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Any</span></span></em>,</dd>
</dl>
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/llmapi/llm.html#LLM.__init__"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.LLM.__init__" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.LLM.generate">
<span class="sig-name descname"><span class="pre">generate</span></span><span class="sig-paren">(</span>
<dl>
<dd><em class="sig-param"><span class="n"><span class="pre">inputs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">TextPrompt</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">TokensPrompt</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Sequence</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">TextPrompt</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">TokensPrompt</span><span class="p"><span class="pre">]</span></span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">sampling_params</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.llmapi.SamplingParams" title="tensorrt_llm.sampling_params.SamplingParams"><span class="pre">SamplingParams</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="#tensorrt_llm.llmapi.SamplingParams" title="tensorrt_llm.sampling_params.SamplingParams"><span class="pre">SamplingParams</span></a><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">use_tqdm</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">lora_request</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">LoRARequest</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Sequence</span><span class="p"><span class="pre">[</span></span><span class="pre">LoRARequest</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">prompt_adapter_request</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">PromptAdapterRequest</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Sequence</span><span class="p"><span class="pre">[</span></span><span class="pre">PromptAdapterRequest</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">queries</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">TextPrompt</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">TokensPrompt</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Sequence</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">TextPrompt</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">TokensPrompt</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
</dl>
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.llmapi.RequestOutput" title="tensorrt_llm.llmapi.llm.RequestOutput"><span class="pre">RequestOutput</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="#tensorrt_llm.llmapi.RequestOutput" title="tensorrt_llm.llmapi.llm.RequestOutput"><span class="pre">RequestOutput</span></a><span class="p"><span class="pre">]</span></span></span></span><a class="reference internal" href="../_modules/tensorrt_llm/llmapi/llm.html#LLM.generate"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.LLM.generate" title="Link to this definition">#</a></dt>
<dd><p>Generate output for the given prompts in the synchronous mode.
Synchronous generation accepts either single prompt or batched prompts.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>inputs</strong> (<em>PromptInputs</em><em> or </em><em>Sequence</em><em>[</em><em>PromptInputs</em><em>]</em>) The prompt text or token ids.
it can be single prompt or batched prompts.</p></li>
<li><p><strong>sampling_params</strong> (<a class="reference internal" href="#tensorrt_llm.llmapi.SamplingParams" title="tensorrt_llm.llmapi.SamplingParams"><em>SamplingParams</em></a><em>, </em><em>List</em><em>[</em><a class="reference internal" href="#tensorrt_llm.llmapi.SamplingParams" title="tensorrt_llm.llmapi.SamplingParams"><em>SamplingParams</em></a><em>]</em><em>, </em><em>optional</em>) The sampling params for the
generation, a default one will be used if not provided. Defaults to None.</p></li>
<li><p><strong>use_tqdm</strong> (<em>bool</em>) Whether to use tqdm to display the progress bar. Defaults to True.</p></li>
<li><p><strong>lora_request</strong> (<em>LoRARequest</em><em>, </em><em>Sequence</em><em>[</em><em>LoRARequest</em><em>]</em><em>, </em><em>optional</em>) LoRA request to use for generation,
if any. Defaults to None.</p></li>
<li><p><strong>prompt_adapter_request</strong> (<em>PromptAdapterRequest</em><em>, </em><em>Sequence</em><em>[</em><em>PromptAdapterRequest</em><em>]</em><em>, </em><em>optional</em>) Prompt Adapter request to use for generation, if any. Defaults to None.</p></li>
<li><p><strong>queries</strong> (<em>PromptInputs</em><em> or </em><em>Sequence</em><em>[</em><em>PromptInputs</em><em>]</em>) The query text or token ids.
it can be single prompt or batched prompts. it is used for star attention to run long context tasks.</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>The output data of the completion request to the LLM.</p>
</dd>
<dt class="field-odd">Return type<span class="colon">:</span></dt>
<dd class="field-odd"><p>Union[<a class="reference internal" href="#tensorrt_llm.llmapi.RequestOutput" title="tensorrt_llm.llmapi.RequestOutput">RequestOutput</a>, List[<a class="reference internal" href="#tensorrt_llm.llmapi.RequestOutput" title="tensorrt_llm.llmapi.RequestOutput">RequestOutput</a>]]</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.LLM.generate_async">
<span class="sig-name descname"><span class="pre">generate_async</span></span><span class="sig-paren">(</span>
<dl>
<dd><em class="sig-param"><span class="n"><span class="pre">inputs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">TextPrompt</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">TokensPrompt</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">sampling_params</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.llmapi.SamplingParams" title="tensorrt_llm.sampling_params.SamplingParams"><span class="pre">SamplingParams</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">lora_request</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">LoRARequest</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">prompt_adapter_request</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">PromptAdapterRequest</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">streaming</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">queries</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">TextPrompt</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">TokensPrompt</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
</dl>
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.llmapi.RequestOutput" title="tensorrt_llm.llmapi.llm.RequestOutput"><span class="pre">RequestOutput</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/llmapi/llm.html#LLM.generate_async"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.LLM.generate_async" title="Link to this definition">#</a></dt>
<dd><p>Generate output for the given prompt in the asynchronous mode.
Asynchronous generation accepts single prompt only.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>inputs</strong> (<em>PromptInputs</em>) The prompt text or token ids; it must be single prompt.</p></li>
<li><p><strong>sampling_params</strong> (<a class="reference internal" href="#tensorrt_llm.llmapi.SamplingParams" title="tensorrt_llm.llmapi.SamplingParams"><em>SamplingParams</em></a><em>, </em><em>optional</em>) The sampling params for the generation,
a default one will be used if not provided. Defaults to None.</p></li>
<li><p><strong>lora_request</strong> (<em>LoRARequest</em><em>, </em><em>optional</em>) LoRA request to use for generation, if any.
Defaults to None.</p></li>
<li><p><strong>prompt_adapter_request</strong> (<em>PromptAdapterRequest</em><em>, </em><em>optional</em>) Prompt Adapter request to
use for generation, if any. Defaults to None.</p></li>
<li><p><strong>streaming</strong> (<em>bool</em>) Whether to use the streaming mode for the generation. Defaults to
False.</p></li>
<li><p><strong>queries</strong> (<em>PromptInputs</em><em> or </em><em>Sequence</em><em>[</em><em>PromptInputs</em><em>]</em>) The query text or token ids.
it can be single prompt or batched prompts. it is used for star attention to run long context tasks.</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>The output data of the completion request to the LLM.</p>
</dd>
<dt class="field-odd">Return type<span class="colon">:</span></dt>
<dd class="field-odd"><p><a class="reference internal" href="#tensorrt_llm.llmapi.RequestOutput" title="tensorrt_llm.llmapi.RequestOutput">RequestOutput</a></p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.LLM.save">
<span class="sig-name descname"><span class="pre">save</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">engine_dir</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/llmapi/llm.html#LLM.save"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.LLM.save" title="Link to this definition">#</a></dt>
<dd><p>Save the built engine to the given path.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>engine_dir</strong> (<em>str</em>) The path to save the engine.</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>None</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.LLM.shutdown">
<span class="sig-name descname"><span class="pre">shutdown</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/llmapi/llm.html#LLM.shutdown"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.LLM.shutdown" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.LLM.tokenizer">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">tokenizer</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">TokenizerBase</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.LLM.tokenizer" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.LLM.workspace">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">workspace</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Path</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.LLM.workspace" title="Link to this definition">#</a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="py class">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.RequestOutput">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.llmapi.</span></span><span class="sig-name descname"><span class="pre">RequestOutput</span></span><span class="sig-paren">(</span>
<dl>
<dd><em class="sig-param"><span class="n"><span class="pre">generation_result</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">GenerationResult</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">prompt</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">tokenizer</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">TokenizerBase</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
</dl>
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/llmapi/llm.html#RequestOutput"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.RequestOutput" title="Link to this definition">#</a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">GenerationResult</span></code></p>
<p>The output data of a completion request to the LLM.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>request_id</strong> (<em>int</em>) The unique ID of the request.</p></li>
<li><p><strong>prompt</strong> (<em>str</em><em>, </em><em>optional</em>) The prompt string of the request.</p></li>
<li><p><strong>prompt_token_ids</strong> (<em>List</em><em>[</em><em>int</em><em>]</em>) The token ids of the prompt.</p></li>
<li><p><strong>outputs</strong> (<em>List</em><em>[</em><em>CompletionOutput</em><em>]</em>) The output sequences of the request.</p></li>
<li><p><strong>context_logits</strong> (<em>torch.Tensor</em><em>, </em><em>optional</em>) The logits on the prompt token ids.</p></li>
<li><p><strong>finished</strong> (<em>bool</em>) Whether the whole request is finished.</p></li>
</ul>
</dd>
</dl>
<dl class="py method">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.RequestOutput.__init__">
<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span>
<dl>
<dd><em class="sig-param"><span class="n"><span class="pre">generation_result</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">GenerationResult</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">prompt</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">tokenizer</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">TokenizerBase</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
</dl>
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="reference internal" href="../_modules/tensorrt_llm/llmapi/llm.html#RequestOutput.__init__"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.RequestOutput.__init__" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.RequestOutput.handle_response">
<span class="sig-name descname"><span class="pre">handle_response</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">response</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/llmapi/llm.html#RequestOutput.handle_response"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.RequestOutput.handle_response" title="Link to this definition">#</a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="py class">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.GuidedDecodingParams">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.llmapi.</span></span><span class="sig-name descname"><span class="pre">GuidedDecodingParams</span></span><span class="sig-paren">(</span>
<dl>
<dd><em class="sig-param"><span class="o"><span class="pre">*</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">json</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">BaseModel</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">dict</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">regex</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">grammar</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">json_object</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
</dl>
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/sampling_params.html#GuidedDecodingParams"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.GuidedDecodingParams" title="Link to this definition">#</a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
<p>Guided decoding parameters for text generation. Only one of the fields could be effective.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>json</strong> (<em>str</em><em>, </em><em>BaseModel</em><em>, </em><em>dict</em><em>, </em><em>optional</em>) The generated text is amenable to json format with additional user-specified restrictions, namely schema. Defaults to None.</p></li>
<li><p><strong>regex</strong> (<em>str</em><em>, </em><em>optional</em>) The generated text is amenable to the user-specified regular expression. Defaults to None.</p></li>
<li><p><strong>grammar</strong> (<em>str</em><em>, </em><em>optional</em>) The generated text is amenable to the user-specified extended Backus-Naur form (EBNF) grammar. Defaults to None.</p></li>
<li><p><strong>json_object</strong> (<em>bool</em>) If True, the generated text is amenable to json format. Defaults to False.</p></li>
</ul>
</dd>
</dl>
<dl class="py method">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.GuidedDecodingParams.__init__">
<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span>
<dl>
<dd><em class="sig-param"><span class="o"><span class="pre">*</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">json</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">BaseModel</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">dict</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">regex</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">grammar</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">json_object</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
</dl>
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#tensorrt_llm.llmapi.GuidedDecodingParams.__init__" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.GuidedDecodingParams.grammar">
<span class="sig-name descname"><span class="pre">grammar</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.GuidedDecodingParams.grammar" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.GuidedDecodingParams.json">
<span class="sig-name descname"><span class="pre">json</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">BaseModel</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">dict</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.GuidedDecodingParams.json" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.GuidedDecodingParams.json_object">
<span class="sig-name descname"><span class="pre">json_object</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.GuidedDecodingParams.json_object" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.GuidedDecodingParams.num_guides">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">num_guides</span></span><a class="headerlink" href="#tensorrt_llm.llmapi.GuidedDecodingParams.num_guides" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.GuidedDecodingParams.regex">
<span class="sig-name descname"><span class="pre">regex</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.GuidedDecodingParams.regex" title="Link to this definition">#</a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="py class">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.llmapi.</span></span><span class="sig-name descname"><span class="pre">SamplingParams</span></span><span class="sig-paren">(</span>
<dl>
<dd><em class="sig-param"><span class="o"><span class="pre">*</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">end_id</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">pad_id</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">max_tokens</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">32</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">max_new_tokens</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">bad</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">bad_token_ids</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">stop</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">stop_token_ids</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">include_stop_str_in_output</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">embedding_bias</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Tensor</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">external_draft_tokens_config</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">ExternalDraftTokensConfig</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">logits_post_processor_name</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">n</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">best_of</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">use_beam_search</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">beam_width</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">num_return_sequences</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">top_k</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">top_p</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">top_p_min</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">top_p_reset_ids</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">top_p_decay</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">seed</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">random_seed</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">temperature</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">min_tokens</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">min_length</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">beam_search_diversity_rate</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">repetition_penalty</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">presence_penalty</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">frequency_penalty</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">length_penalty</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">early_stopping</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">no_repeat_ngram_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">return_log_probs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">return_context_logits</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">return_generation_logits</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">exclude_input_from_output</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">return_encoder_output</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">return_perf_metrics</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">additional_model_outputs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">AdditionalModelOutput</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">lookahead_config</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig" title="tensorrt_llm.bindings.executor.LookaheadDecodingConfig"><span class="pre">LookaheadDecodingConfig</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">guided_decoding</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.llmapi.GuidedDecodingParams" title="tensorrt_llm.sampling_params.GuidedDecodingParams"><span class="pre">GuidedDecodingParams</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">ignore_eos</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">detokenize</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">add_special_tokens</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">truncate_prompt_tokens</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">skip_special_tokens</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">spaces_between_special_tokens</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em>,</dd>
</dl>
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/sampling_params.html#SamplingParams"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams" title="Link to this definition">#</a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
<p>Sampling parameters for text generation.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>end_id</strong> (<em>int</em><em>, </em><em>optional</em>) The end token id. Defaults to None.</p></li>
<li><p><strong>pad_id</strong> (<em>int</em><em>, </em><em>optional</em>) The pad token id. Defaults to None.</p></li>
<li><p><strong>max_tokens</strong> (<em>int</em>) The maximum number of tokens to generate. Defaults to 32.</p></li>
<li><p><strong>max_new_tokens</strong> (<em>int</em><em>, </em><em>optional</em>) The maximum number of tokens to generate. This argument is being deprecated; please use max_tokens instead. Defaults to None.</p></li>
<li><p><strong>bad</strong> (<em>str</em><em>, </em><em>List</em><em>[</em><em>str</em><em>]</em><em>, </em><em>optional</em>) A string or a list of strings that redirect the generation when they are generated, so that the bad strings are excluded from the returned output. Defaults to None.</p></li>
<li><p><strong>bad_token_ids</strong> (<em>List</em><em>[</em><em>int</em><em>]</em><em>, </em><em>optional</em>) A list of token ids that redirect the generation when they are generated, so that the bad ids are excluded from the returned output. Defaults to None.</p></li>
<li><p><strong>stop</strong> (<em>str</em><em>, </em><em>List</em><em>[</em><em>str</em><em>]</em><em>, </em><em>optional</em>) A string or a list of strings that stop the generation when they are generated. The returned output will not contain the stop strings unless include_stop_str_in_output is True. Defaults to None.</p></li>
<li><p><strong>stop_token_ids</strong> (<em>List</em><em>[</em><em>int</em><em>]</em><em>, </em><em>optional</em>) A list of token ids that stop the generation when they are generated. Defaults to None.</p></li>
<li><p><strong>include_stop_str_in_output</strong> (<em>bool</em>) Whether to include the stop strings in output text. Defaults to False.</p></li>
<li><p><strong>embedding_bias</strong> (<em>torch.Tensor</em><em>, </em><em>optional</em>) The embedding bias tensor. Expected type is kFP32 and shape is [vocab_size]. Defaults to None.</p></li>
<li><p><strong>external_draft_tokens_config</strong> (<em>ExternalDraftTokensConfig</em><em>, </em><em>optional</em>) The speculative decoding configuration. Defaults to None.</p></li>
<li><p><strong>logits_post_processor_name</strong> (<em>str</em><em>, </em><em>optional</em>) The logits postprocessor name. Must correspond to one of the logits postprocessor name provided to the ExecutorConfig. Defaults to None.</p></li>
<li><p><strong>n</strong> (<em>int</em>) Number of sequences to generate. Defaults to 1.</p></li>
<li><p><strong>best_of</strong> (<em>int</em><em>, </em><em>optional</em>) Number of sequences to consider for best output. Defaults to None.</p></li>
<li><p><strong>use_beam_search</strong> (<em>bool</em>) Whether to use beam search. Defaults to False.</p></li>
<li><p><strong>beam_width</strong> (<em>int</em>) The beam width. Setting 1 disables beam search. This parameter will be deprecated from the LLM API in a future release. Please use n/best_of/use_beam_search instead. Defaults to 1.</p></li>
<li><p><strong>num_return_sequences</strong> (<em>int</em><em>, </em><em>optional</em>) The number of sequences to return. If set to None, it defaults to the value of <cite>beam_width</cite>. The default is None. This parameter will be deprecated from the LLM API in a future release. Please use n/best_of/use_beam_search instead. Defaults to None.</p></li>
<li><p><strong>top_k</strong> (<em>int</em>) Controls number of logits to sample from. Default is 0 (all logits).</p></li>
<li><p><strong>top_p</strong> (<em>float</em>) Controls the top-P probability to sample from. Default is 0.f</p></li>
<li><p><strong>top_p_min</strong> (<em>float</em>) Controls decay in the top-P algorithm. topPMin is lower-bound. Default is 1.e-6.</p></li>
<li><p><strong>top_p_reset_ids</strong> (<em>int</em>) Controls decay in the top-P algorithm. Indicates where to reset the decay. Default is 1.</p></li>
<li><p><strong>top_p_decay</strong> (<em>float</em>) Controls decay in the top-P algorithm. The decay value. Default is 1.f</p></li>
<li><p><strong>seed</strong> (<em>int</em>) Controls the random seed used by the random number generator in sampling</p></li>
<li><p><strong>random_seed</strong> (<em>int</em>) Controls the random seed used by the random number generator in sampling. This argument is being deprecated; please use seed instead.</p></li>
<li><p><strong>temperature</strong> (<em>float</em>) Controls the modulation of logits when sampling new tokens. It can have values &gt; 0.f. Default is 1.0f</p></li>
<li><p><strong>min_tokens</strong> (<em>int</em>) Lower bound on the number of tokens to generate. Values &lt; 1 have no effect. Default is 1.</p></li>
<li><p><strong>min_length</strong> (<em>int</em>) Lower bound on the number of tokens to generate. Values &lt; 1 have no effect. Default is 1. This argument is being deprecated; please use min_tokens instead.</p></li>
<li><p><strong>beam_search_diversity_rate</strong> (<em>float</em>) Controls the diversity in beam search.</p></li>
<li><p><strong>repetition_penalty</strong> (<em>float</em>) Used to penalize tokens based on how often they appear in the sequence. It can have any value &gt; 0.f. Values &lt; 1.f encourages repetition, values &gt; 1.f discourages it. Default is 1.f</p></li>
<li><p><strong>presence_penalty</strong> (<em>float</em>) Used to penalize tokens already present in the sequence (irrespective of the number of appearances). It can have any values. Values &lt; 0.f encourage repetition, values &gt; 0.f discourage it. Default is 0.f</p></li>
<li><p><strong>frequency_penalty</strong> (<em>float</em>) Used to penalize tokens already present in the sequence (dependent on the number of appearances). It can have any values. Values &lt; 0.f encourage repetition, values &gt; 0.f discourage it. Default is 0.f</p></li>
<li><p><strong>length_penalty</strong> (<em>float</em>) Controls how to penalize longer sequences in beam search. Default is 0.f</p></li>
<li><p><strong>early_stopping</strong> (<em>int</em>) Controls whether the generation process finishes once beamWidth sentences are generated (ends with end_token)</p></li>
<li><p><strong>no_repeat_ngram_size</strong> (<em>int</em>) Controls how many repeat ngram size are acceptable. Default is 1 &lt;&lt; 30.</p></li>
<li><p><strong>return_log_probs</strong> (<em>bool</em>) Controls if Result should contain log probabilities. Default is false.</p></li>
<li><p><strong>return_context_logits</strong> (<em>bool</em>) Controls if Result should contain the context logits. Default is false.</p></li>
<li><p><strong>return_generation_logits</strong> (<em>bool</em>) Controls if Result should contain the generation logits. Default is false.</p></li>
<li><p><strong>exclude_input_from_output</strong> (<em>bool</em>) Controls if output tokens in Result should include the input tokens. Default is true.</p></li>
<li><p><strong>return_encoder_output</strong> (<em>bool</em>) Controls if Result should contain encoder output hidden states (for encoder-only and encoder-decoder models). Default is false.</p></li>
<li><p><strong>return_perf_metrics</strong> (<em>bool</em>) Controls if Result should contain the performance metrics for this request. Default is false.</p></li>
<li><p><strong>additional_model_outputs</strong> (<em>list</em><em>[</em><em>AdditionalModelOutput</em><em>]</em><em>, </em><em>optional</em>) The additional outputs to gather from the model.</p></li>
<li><p><strong>lookahead_config</strong> (<a class="reference internal" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig" title="tensorrt_llm.llmapi.LookaheadDecodingConfig"><em>LookaheadDecodingConfig</em></a><em> , </em><em>optional</em>) Lookahead decoding config. Defaults to None.</p></li>
<li><p><strong>guided_decoding</strong> (<a class="reference internal" href="#tensorrt_llm.llmapi.GuidedDecodingParams" title="tensorrt_llm.llmapi.GuidedDecodingParams"><em>GuidedDecodingParams</em></a><em>, </em><em>optional</em>) Guided decoding params. Defaults to None.</p></li>
<li><p><strong>ignore_eos</strong> (<em>bool</em>) Whether to ignore the EOS token and continue generating tokens after the EOS token is generated. Defaults to False.</p></li>
<li><p><strong>detokenize</strong> (<em>bool</em>) Whether to detokenize the output. Defaults to True.</p></li>
<li><p><strong>add_special_tokens</strong> (<em>bool</em>) Whether to add special tokens to the prompt. Defaults to True.</p></li>
<li><p><strong>truncate_prompt_tokens</strong> (<em>int</em><em>, </em><em>optional</em>) If set to an integer k, will use only the last k tokens from the prompt (i.e., left truncation). Defaults to None.</p></li>
<li><p><strong>skip_special_tokens</strong> (<em>bool</em>) Whether to skip special tokens in the output. Defaults to True.</p></li>
<li><p><strong>spaces_between_special_tokens</strong> (<em>bool</em>) Whether to add spaces between special tokens in the output. Defaults to True.</p></li>
</ul>
</dd>
</dl>
<dl class="py method">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.__init__">
<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span>
<dl>
<dd><em class="sig-param"><span class="o"><span class="pre">*</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">end_id</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">pad_id</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">max_tokens</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">32</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">max_new_tokens</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">bad</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">bad_token_ids</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">stop</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">stop_token_ids</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">include_stop_str_in_output</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">embedding_bias</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Tensor</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">external_draft_tokens_config</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">ExternalDraftTokensConfig</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">logits_post_processor_name</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">n</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">best_of</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">use_beam_search</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">beam_width</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">num_return_sequences</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">top_k</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">top_p</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">top_p_min</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">top_p_reset_ids</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">top_p_decay</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">seed</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">random_seed</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">temperature</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">min_tokens</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">min_length</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">beam_search_diversity_rate</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">repetition_penalty</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">presence_penalty</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">frequency_penalty</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">length_penalty</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">early_stopping</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">no_repeat_ngram_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">return_log_probs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">return_context_logits</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">return_generation_logits</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">exclude_input_from_output</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">return_encoder_output</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">return_perf_metrics</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">additional_model_outputs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">AdditionalModelOutput</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">lookahead_config</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig" title="tensorrt_llm.bindings.executor.LookaheadDecodingConfig"><span class="pre">LookaheadDecodingConfig</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">guided_decoding</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.llmapi.GuidedDecodingParams" title="tensorrt_llm.sampling_params.GuidedDecodingParams"><span class="pre">GuidedDecodingParams</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">ignore_eos</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">detokenize</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">add_special_tokens</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">truncate_prompt_tokens</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">skip_special_tokens</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">spaces_between_special_tokens</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em>,</dd>
</dl>
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.__init__" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.add_special_tokens">
<span class="sig-name descname"><span class="pre">add_special_tokens</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.add_special_tokens" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.additional_model_outputs">
<span class="sig-name descname"><span class="pre">additional_model_outputs</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">AdditionalModelOutput</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.additional_model_outputs" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.bad">
<span class="sig-name descname"><span class="pre">bad</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.bad" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.bad_token_ids">
<span class="sig-name descname"><span class="pre">bad_token_ids</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.bad_token_ids" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.beam_search_diversity_rate">
<span class="sig-name descname"><span class="pre">beam_search_diversity_rate</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.beam_search_diversity_rate" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.beam_width">
<span class="sig-name descname"><span class="pre">beam_width</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.beam_width" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.best_of">
<span class="sig-name descname"><span class="pre">best_of</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.best_of" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.detokenize">
<span class="sig-name descname"><span class="pre">detokenize</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.detokenize" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.early_stopping">
<span class="sig-name descname"><span class="pre">early_stopping</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.early_stopping" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.embedding_bias">
<span class="sig-name descname"><span class="pre">embedding_bias</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Tensor</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.embedding_bias" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.end_id">
<span class="sig-name descname"><span class="pre">end_id</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.end_id" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.exclude_input_from_output">
<span class="sig-name descname"><span class="pre">exclude_input_from_output</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.exclude_input_from_output" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.external_draft_tokens_config">
<span class="sig-name descname"><span class="pre">external_draft_tokens_config</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">ExternalDraftTokensConfig</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.external_draft_tokens_config" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.frequency_penalty">
<span class="sig-name descname"><span class="pre">frequency_penalty</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.frequency_penalty" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.greedy_decoding">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">greedy_decoding</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.greedy_decoding" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.guided_decoding">
<span class="sig-name descname"><span class="pre">guided_decoding</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="#tensorrt_llm.llmapi.GuidedDecodingParams" title="tensorrt_llm.sampling_params.GuidedDecodingParams"><span class="pre">GuidedDecodingParams</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.guided_decoding" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.ignore_eos">
<span class="sig-name descname"><span class="pre">ignore_eos</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.ignore_eos" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.include_stop_str_in_output">
<span class="sig-name descname"><span class="pre">include_stop_str_in_output</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.include_stop_str_in_output" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.length_penalty">
<span class="sig-name descname"><span class="pre">length_penalty</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.length_penalty" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.logits_post_processor_name">
<span class="sig-name descname"><span class="pre">logits_post_processor_name</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.logits_post_processor_name" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.lookahead_config">
<span class="sig-name descname"><span class="pre">lookahead_config</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig" title="tensorrt_llm.bindings.executor.LookaheadDecodingConfig"><span class="pre">LookaheadDecodingConfig</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.lookahead_config" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.max_new_tokens">
<span class="sig-name descname"><span class="pre">max_new_tokens</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.max_new_tokens" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.max_tokens">
<span class="sig-name descname"><span class="pre">max_tokens</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.max_tokens" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.min_length">
<span class="sig-name descname"><span class="pre">min_length</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.min_length" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.min_tokens">
<span class="sig-name descname"><span class="pre">min_tokens</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.min_tokens" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.n">
<span class="sig-name descname"><span class="pre">n</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.n" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.no_repeat_ngram_size">
<span class="sig-name descname"><span class="pre">no_repeat_ngram_size</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.no_repeat_ngram_size" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.num_return_sequences">
<span class="sig-name descname"><span class="pre">num_return_sequences</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.num_return_sequences" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.pad_id">
<span class="sig-name descname"><span class="pre">pad_id</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.pad_id" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.presence_penalty">
<span class="sig-name descname"><span class="pre">presence_penalty</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.presence_penalty" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.random_seed">
<span class="sig-name descname"><span class="pre">random_seed</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.random_seed" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.repetition_penalty">
<span class="sig-name descname"><span class="pre">repetition_penalty</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.repetition_penalty" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.return_context_logits">
<span class="sig-name descname"><span class="pre">return_context_logits</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.return_context_logits" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.return_encoder_output">
<span class="sig-name descname"><span class="pre">return_encoder_output</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.return_encoder_output" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.return_generation_logits">
<span class="sig-name descname"><span class="pre">return_generation_logits</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.return_generation_logits" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.return_log_probs">
<span class="sig-name descname"><span class="pre">return_log_probs</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.return_log_probs" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.return_perf_metrics">
<span class="sig-name descname"><span class="pre">return_perf_metrics</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.return_perf_metrics" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.seed">
<span class="sig-name descname"><span class="pre">seed</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.seed" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.setup">
<span class="sig-name descname"><span class="pre">setup</span></span><span class="sig-paren">(</span>
<dl>
<dd><em class="sig-param"><span class="n"><span class="pre">tokenizer</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">add_special_tokens</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
</dl>
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#tensorrt_llm.llmapi.SamplingParams" title="tensorrt_llm.sampling_params.SamplingParams"><span class="pre">SamplingParams</span></a></span></span><a class="reference internal" href="../_modules/tensorrt_llm/sampling_params.html#SamplingParams.setup"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.setup" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.skip_special_tokens">
<span class="sig-name descname"><span class="pre">skip_special_tokens</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.skip_special_tokens" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.spaces_between_special_tokens">
<span class="sig-name descname"><span class="pre">spaces_between_special_tokens</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.spaces_between_special_tokens" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.stop">
<span class="sig-name descname"><span class="pre">stop</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.stop" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.stop_token_ids">
<span class="sig-name descname"><span class="pre">stop_token_ids</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.stop_token_ids" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.temperature">
<span class="sig-name descname"><span class="pre">temperature</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.temperature" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.top_k">
<span class="sig-name descname"><span class="pre">top_k</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.top_k" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.top_p">
<span class="sig-name descname"><span class="pre">top_p</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.top_p" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.top_p_decay">
<span class="sig-name descname"><span class="pre">top_p_decay</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.top_p_decay" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.top_p_min">
<span class="sig-name descname"><span class="pre">top_p_min</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.top_p_min" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.top_p_reset_ids">
<span class="sig-name descname"><span class="pre">top_p_reset_ids</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.top_p_reset_ids" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.truncate_prompt_tokens">
<span class="sig-name descname"><span class="pre">truncate_prompt_tokens</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.truncate_prompt_tokens" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SamplingParams.use_beam_search">
<span class="sig-name descname"><span class="pre">use_beam_search</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.SamplingParams.use_beam_search" title="Link to this definition">#</a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="py class">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.KvCacheConfig">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.llmapi.</span></span><span class="sig-name descname"><span class="pre">KvCacheConfig</span></span><a class="headerlink" href="#tensorrt_llm.llmapi.KvCacheConfig" title="Link to this definition">#</a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">pybind11_object</span></code></p>
<dl class="py method">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.KvCacheConfig.__init__">
<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span>
<dl>
<dd><em class="sig-param"><span class="n"><span class="pre">self</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.llmapi.KvCacheConfig" title="tensorrt_llm.bindings.executor.KvCacheConfig"><span class="pre">tensorrt_llm.bindings.executor.KvCacheConfig</span></a></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">enable_block_reuse</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">max_tokens</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">max_attention_window</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">sink_token_length</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">free_gpu_memory_fraction</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">host_cache_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">onboard_blocks</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">cross_kv_cache_fraction</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">secondary_offload_min_priority</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">event_buffer_max_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0</span></span></em>,</dd>
<dd><em class="sig-param"><span class="o"><span class="pre">*</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">runtime_defaults</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">tensorrt_llm.bindings.executor.RuntimeDefaults</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
</dl>
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#tensorrt_llm.llmapi.KvCacheConfig.__init__" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.KvCacheConfig.cross_kv_cache_fraction">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">cross_kv_cache_fraction</span></span><a class="headerlink" href="#tensorrt_llm.llmapi.KvCacheConfig.cross_kv_cache_fraction" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.KvCacheConfig.enable_block_reuse">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">enable_block_reuse</span></span><a class="headerlink" href="#tensorrt_llm.llmapi.KvCacheConfig.enable_block_reuse" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.KvCacheConfig.event_buffer_max_size">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">event_buffer_max_size</span></span><a class="headerlink" href="#tensorrt_llm.llmapi.KvCacheConfig.event_buffer_max_size" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.KvCacheConfig.fill_empty_fields_from_runtime_defaults">
<span class="sig-name descname"><span class="pre">fill_empty_fields_from_runtime_defaults</span></span><span class="sig-paren">(</span>
<dl>
<dd><em class="sig-param"><span class="n"><span class="pre">self</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.llmapi.KvCacheConfig" title="tensorrt_llm.bindings.executor.KvCacheConfig"><span class="pre">tensorrt_llm.bindings.executor.KvCacheConfig</span></a></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">arg0</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">tensorrt_llm.bindings.executor.RuntimeDefaults</span></span></em>,</dd>
</dl>
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#tensorrt_llm.llmapi.KvCacheConfig.fill_empty_fields_from_runtime_defaults" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.KvCacheConfig.free_gpu_memory_fraction">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">free_gpu_memory_fraction</span></span><a class="headerlink" href="#tensorrt_llm.llmapi.KvCacheConfig.free_gpu_memory_fraction" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.KvCacheConfig.host_cache_size">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">host_cache_size</span></span><a class="headerlink" href="#tensorrt_llm.llmapi.KvCacheConfig.host_cache_size" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.KvCacheConfig.max_attention_window">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">max_attention_window</span></span><a class="headerlink" href="#tensorrt_llm.llmapi.KvCacheConfig.max_attention_window" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.KvCacheConfig.max_tokens">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">max_tokens</span></span><a class="headerlink" href="#tensorrt_llm.llmapi.KvCacheConfig.max_tokens" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.KvCacheConfig.onboard_blocks">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">onboard_blocks</span></span><a class="headerlink" href="#tensorrt_llm.llmapi.KvCacheConfig.onboard_blocks" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.KvCacheConfig.secondary_offload_min_priority">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">secondary_offload_min_priority</span></span><a class="headerlink" href="#tensorrt_llm.llmapi.KvCacheConfig.secondary_offload_min_priority" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.KvCacheConfig.sink_token_length">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">sink_token_length</span></span><a class="headerlink" href="#tensorrt_llm.llmapi.KvCacheConfig.sink_token_length" title="Link to this definition">#</a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="py class">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.LookaheadDecodingConfig">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.llmapi.</span></span><span class="sig-name descname"><span class="pre">LookaheadDecodingConfig</span></span><a class="headerlink" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig" title="Link to this definition">#</a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">pybind11_object</span></code></p>
<dl class="py method">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.LookaheadDecodingConfig.__init__">
<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span>
<dl>
<dd><em class="sig-param"><span class="n"><span class="pre">self</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig" title="tensorrt_llm.bindings.executor.LookaheadDecodingConfig"><span class="pre">tensorrt_llm.bindings.executor.LookaheadDecodingConfig</span></a></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">max_window_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">max_ngram_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">max_verification_set_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>,</dd>
</dl>
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig.__init__" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.LookaheadDecodingConfig.calculate_speculative_resource">
<span class="sig-name descname"><span class="pre">calculate_speculative_resource</span></span><span class="sig-paren">(</span>
<dl>
<dd><em class="sig-param"><span class="n"><span class="pre">self</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig" title="tensorrt_llm.bindings.executor.LookaheadDecodingConfig"><span class="pre">tensorrt_llm.bindings.executor.LookaheadDecodingConfig</span></a></span></em>,</dd>
</dl>
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span></span><a class="headerlink" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig.calculate_speculative_resource" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.LookaheadDecodingConfig.max_ngram_size">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">max_ngram_size</span></span><a class="headerlink" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig.max_ngram_size" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.LookaheadDecodingConfig.max_verification_set_size">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">max_verification_set_size</span></span><a class="headerlink" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig.max_verification_set_size" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.LookaheadDecodingConfig.max_window_size">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">max_window_size</span></span><a class="headerlink" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig.max_window_size" title="Link to this definition">#</a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="py class">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.MedusaDecodingConfig">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.llmapi.</span></span><span class="sig-name descname"><span class="pre">MedusaDecodingConfig</span></span><span class="sig-paren">(</span>
<dl>
<dd><em class="sig-param"><span class="n"><span class="pre">medusa_choices</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">num_medusa_heads</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
</dl>
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/llmapi/llm_utils.html#MedusaDecodingConfig"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.MedusaDecodingConfig" title="Link to this definition">#</a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
<dl class="py method">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.MedusaDecodingConfig.__init__">
<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span>
<dl>
<dd><em class="sig-param"><span class="n"><span class="pre">medusa_choices</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">num_medusa_heads</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
</dl>
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#tensorrt_llm.llmapi.MedusaDecodingConfig.__init__" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.MedusaDecodingConfig.medusa_choices">
<span class="sig-name descname"><span class="pre">medusa_choices</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.MedusaDecodingConfig.medusa_choices" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.MedusaDecodingConfig.num_medusa_heads">
<span class="sig-name descname"><span class="pre">num_medusa_heads</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.MedusaDecodingConfig.num_medusa_heads" title="Link to this definition">#</a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="py class">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SchedulerConfig">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.llmapi.</span></span><span class="sig-name descname"><span class="pre">SchedulerConfig</span></span><a class="headerlink" href="#tensorrt_llm.llmapi.SchedulerConfig" title="Link to this definition">#</a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">pybind11_object</span></code></p>
<dl class="py method">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SchedulerConfig.__init__">
<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span>
<dl>
<dd><em class="sig-param"><span class="n"><span class="pre">self</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.llmapi.SchedulerConfig" title="tensorrt_llm.bindings.executor.SchedulerConfig"><span class="pre">tensorrt_llm.bindings.executor.SchedulerConfig</span></a></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">capacity_scheduler_policy</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.llmapi.CapacitySchedulerPolicy" title="tensorrt_llm.bindings.executor.CapacitySchedulerPolicy"><span class="pre">tensorrt_llm.bindings.executor.CapacitySchedulerPolicy</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">CapacitySchedulerPolicy.GUARANTEED_NO_EVICT</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">context_chunking_policy</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">tensorrt_llm.bindings.executor.ContextChunkingPolicy</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">dynamic_batch_config</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">tensorrt_llm.bindings.executor.DynamicBatchConfig</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
</dl>
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#tensorrt_llm.llmapi.SchedulerConfig.__init__" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SchedulerConfig.capacity_scheduler_policy">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">capacity_scheduler_policy</span></span><a class="headerlink" href="#tensorrt_llm.llmapi.SchedulerConfig.capacity_scheduler_policy" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SchedulerConfig.context_chunking_policy">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">context_chunking_policy</span></span><a class="headerlink" href="#tensorrt_llm.llmapi.SchedulerConfig.context_chunking_policy" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.SchedulerConfig.dynamic_batch_config">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">dynamic_batch_config</span></span><a class="headerlink" href="#tensorrt_llm.llmapi.SchedulerConfig.dynamic_batch_config" title="Link to this definition">#</a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="py class">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.CapacitySchedulerPolicy">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.llmapi.</span></span><span class="sig-name descname"><span class="pre">CapacitySchedulerPolicy</span></span><a class="headerlink" href="#tensorrt_llm.llmapi.CapacitySchedulerPolicy" title="Link to this definition">#</a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">pybind11_object</span></code></p>
<p>Members:</p>
<p>MAX_UTILIZATION</p>
<p>GUARANTEED_NO_EVICT</p>
<p>STATIC_BATCH</p>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.CapacitySchedulerPolicy.GUARANTEED_NO_EVICT">
<span class="sig-name descname"><span class="pre">GUARANTEED_NO_EVICT</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">&lt;CapacitySchedulerPolicy.GUARANTEED_NO_EVICT:</span> <span class="pre">1&gt;</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.CapacitySchedulerPolicy.GUARANTEED_NO_EVICT" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.CapacitySchedulerPolicy.MAX_UTILIZATION">
<span class="sig-name descname"><span class="pre">MAX_UTILIZATION</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">&lt;CapacitySchedulerPolicy.MAX_UTILIZATION:</span> <span class="pre">0&gt;</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.CapacitySchedulerPolicy.MAX_UTILIZATION" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.CapacitySchedulerPolicy.STATIC_BATCH">
<span class="sig-name descname"><span class="pre">STATIC_BATCH</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">&lt;CapacitySchedulerPolicy.STATIC_BATCH:</span> <span class="pre">2&gt;</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.CapacitySchedulerPolicy.STATIC_BATCH" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.CapacitySchedulerPolicy.__init__">
<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span>
<dl>
<dd><em class="sig-param"><span class="n"><span class="pre">self</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#tensorrt_llm.llmapi.CapacitySchedulerPolicy" title="tensorrt_llm.bindings.executor.CapacitySchedulerPolicy"><span class="pre">tensorrt_llm.bindings.executor.CapacitySchedulerPolicy</span></a></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">value</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span></em>,</dd>
</dl>
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#tensorrt_llm.llmapi.CapacitySchedulerPolicy.__init__" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.CapacitySchedulerPolicy.name">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">name</span></span><a class="headerlink" href="#tensorrt_llm.llmapi.CapacitySchedulerPolicy.name" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.CapacitySchedulerPolicy.value">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">value</span></span><a class="headerlink" href="#tensorrt_llm.llmapi.CapacitySchedulerPolicy.value" title="Link to this definition">#</a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="py class">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.llmapi.</span></span><span class="sig-name descname"><span class="pre">BuildConfig</span></span><span class="sig-paren">(</span>
<dl>
<dd><em class="sig-param"><span class="n"><span class="pre">max_input_len:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">1024</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">max_seq_len:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">opt_batch_size:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">8</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">max_batch_size:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">2048</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">max_beam_width:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">1</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">max_num_tokens:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">8192</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">opt_num_tokens:</span> <span class="pre">Optional[int]</span> <span class="pre">=</span> <span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">max_prompt_embedding_table_size:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">0</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">kv_cache_type:</span> <span class="pre">tensorrt_llm.bindings.KVCacheType</span> <span class="pre">=</span> <span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">gather_context_logits:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">gather_generation_logits:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">strongly_typed:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">True</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">force_num_profiles:</span> <span class="pre">Optional[int]</span> <span class="pre">=</span> <span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">profiling_verbosity:</span> <span class="pre">str</span> <span class="pre">=</span> <span class="pre">'layer_names_only'</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">enable_debug_output:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">max_draft_len:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">0</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">speculative_decoding_mode:</span> <span class="pre">tensorrt_llm.models.modeling_utils.SpeculativeDecodingMode</span> <span class="pre">=</span> <span class="pre">&lt;SpeculativeDecodingMode.NONE:</span> <span class="pre">1&gt;</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">use_refit:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">input_timing_cache:</span> <span class="pre">str</span> <span class="pre">=</span> <span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">output_timing_cache:</span> <span class="pre">str</span> <span class="pre">=</span> <span class="pre">'model.cache'</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">lora_config:</span> <span class="pre">tensorrt_llm.lora_manager.LoraConfig</span> <span class="pre">=</span> <span class="pre">&lt;factory&gt;</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">auto_parallel_config:</span> <span class="pre">tensorrt_llm.auto_parallel.config.AutoParallelConfig</span> <span class="pre">=</span> <span class="pre">&lt;factory&gt;</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">weight_sparsity:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">weight_streaming:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">plugin_config:</span> <span class="pre">tensorrt_llm.plugin.plugin.PluginConfig</span> <span class="pre">=</span> <span class="pre">&lt;factory&gt;</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">use_strip_plan:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">max_encoder_input_len:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">1024</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">dry_run:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">visualize_network:</span> <span class="pre">str</span> <span class="pre">=</span> <span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">monitor_memory:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">use_mrope:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False</span></span></em>,</dd>
</dl>
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/builder.html#BuildConfig"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig" title="Link to this definition">#</a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
<dl class="py method">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.__init__">
<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span>
<dl>
<dd><em class="sig-param"><span class="n"><span class="pre">max_input_len:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">1024</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">max_seq_len:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">opt_batch_size:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">8</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">max_batch_size:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">2048</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">max_beam_width:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">1</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">max_num_tokens:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">8192</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">opt_num_tokens:</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">max_prompt_embedding_table_size:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">0</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">kv_cache_type:</span> <span class="pre">~tensorrt_llm.bindings.KVCacheType</span> <span class="pre">=</span> <span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">gather_context_logits:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">gather_generation_logits:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">strongly_typed:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">True</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">force_num_profiles:</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">None</span> <span class="pre">=</span> <span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">profiling_verbosity:</span> <span class="pre">str</span> <span class="pre">=</span> <span class="pre">'layer_names_only'</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">enable_debug_output:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">max_draft_len:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">0</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">speculative_decoding_mode:</span> <span class="pre">~tensorrt_llm.models.modeling_utils.SpeculativeDecodingMode</span> <span class="pre">=</span> <span class="pre">&lt;SpeculativeDecodingMode.NONE:</span> <span class="pre">1&gt;</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">use_refit:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">input_timing_cache:</span> <span class="pre">str</span> <span class="pre">=</span> <span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">output_timing_cache:</span> <span class="pre">str</span> <span class="pre">=</span> <span class="pre">'model.cache'</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">lora_config:</span> <span class="pre">~tensorrt_llm.lora_manager.LoraConfig</span> <span class="pre">=</span> <span class="pre">&lt;factory&gt;</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">auto_parallel_config:</span> <span class="pre">~tensorrt_llm.auto_parallel.config.AutoParallelConfig</span> <span class="pre">=</span> <span class="pre">&lt;factory&gt;</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">weight_sparsity:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">weight_streaming:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">plugin_config:</span> <span class="pre">~tensorrt_llm.plugin.plugin.PluginConfig</span> <span class="pre">=</span> <span class="pre">&lt;factory&gt;</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">use_strip_plan:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">max_encoder_input_len:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">1024</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">dry_run:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">visualize_network:</span> <span class="pre">str</span> <span class="pre">=</span> <span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">monitor_memory:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">use_mrope:</span> <span class="pre">bool</span> <span class="pre">=</span> <span class="pre">False</span></span></em>,</dd>
</dl>
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.__init__" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.auto_parallel_config">
<span class="sig-name descname"><span class="pre">auto_parallel_config</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">AutoParallelConfig</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.auto_parallel_config" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.dry_run">
<span class="sig-name descname"><span class="pre">dry_run</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">False</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.dry_run" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.enable_debug_output">
<span class="sig-name descname"><span class="pre">enable_debug_output</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">False</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.enable_debug_output" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.force_num_profiles">
<span class="sig-name descname"><span class="pre">force_num_profiles</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.force_num_profiles" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.from_dict">
<em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">from_dict</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">config</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">plugin_config</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/builder.html#BuildConfig.from_dict"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.from_dict" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.from_json_file">
<em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">from_json_file</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">config_file</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">plugin_config</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/builder.html#BuildConfig.from_json_file"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.from_json_file" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.gather_context_logits">
<span class="sig-name descname"><span class="pre">gather_context_logits</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">False</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.gather_context_logits" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.gather_generation_logits">
<span class="sig-name descname"><span class="pre">gather_generation_logits</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">False</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.gather_generation_logits" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.input_timing_cache">
<span class="sig-name descname"><span class="pre">input_timing_cache</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.input_timing_cache" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.kv_cache_type">
<span class="sig-name descname"><span class="pre">kv_cache_type</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">KVCacheType</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.kv_cache_type" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.lora_config">
<span class="sig-name descname"><span class="pre">lora_config</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">LoraConfig</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.lora_config" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.max_batch_size">
<span class="sig-name descname"><span class="pre">max_batch_size</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">2048</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.max_batch_size" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.max_beam_width">
<span class="sig-name descname"><span class="pre">max_beam_width</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">1</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.max_beam_width" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.max_draft_len">
<span class="sig-name descname"><span class="pre">max_draft_len</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.max_draft_len" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.max_encoder_input_len">
<span class="sig-name descname"><span class="pre">max_encoder_input_len</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">1024</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.max_encoder_input_len" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.max_input_len">
<span class="sig-name descname"><span class="pre">max_input_len</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">1024</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.max_input_len" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.max_num_tokens">
<span class="sig-name descname"><span class="pre">max_num_tokens</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">8192</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.max_num_tokens" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.max_prompt_embedding_table_size">
<span class="sig-name descname"><span class="pre">max_prompt_embedding_table_size</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.max_prompt_embedding_table_size" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.max_seq_len">
<span class="sig-name descname"><span class="pre">max_seq_len</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.max_seq_len" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.monitor_memory">
<span class="sig-name descname"><span class="pre">monitor_memory</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">False</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.monitor_memory" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.opt_batch_size">
<span class="sig-name descname"><span class="pre">opt_batch_size</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">8</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.opt_batch_size" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.opt_num_tokens">
<span class="sig-name descname"><span class="pre">opt_num_tokens</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.opt_num_tokens" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.output_timing_cache">
<span class="sig-name descname"><span class="pre">output_timing_cache</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'model.cache'</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.output_timing_cache" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.plugin_config">
<span class="sig-name descname"><span class="pre">plugin_config</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="../python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig" title="tensorrt_llm.plugin.plugin.PluginConfig"><span class="pre">PluginConfig</span></a></em><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.plugin_config" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.profiling_verbosity">
<span class="sig-name descname"><span class="pre">profiling_verbosity</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'layer_names_only'</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.profiling_verbosity" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.speculative_decoding_mode">
<span class="sig-name descname"><span class="pre">speculative_decoding_mode</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="../python-api/tensorrt_llm.models.html#tensorrt_llm.models.SpeculativeDecodingMode" title="tensorrt_llm.models.modeling_utils.SpeculativeDecodingMode"><span class="pre">SpeculativeDecodingMode</span></a></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">1</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.speculative_decoding_mode" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.strongly_typed">
<span class="sig-name descname"><span class="pre">strongly_typed</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">True</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.strongly_typed" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.to_dict">
<span class="sig-name descname"><span class="pre">to_dict</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/builder.html#BuildConfig.to_dict"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.to_dict" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.update">
<span class="sig-name descname"><span class="pre">update</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/builder.html#BuildConfig.update"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.update" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.update_from_dict">
<span class="sig-name descname"><span class="pre">update_from_dict</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">config</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">dict</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/builder.html#BuildConfig.update_from_dict"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.update_from_dict" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.update_kv_cache_type">
<span class="sig-name descname"><span class="pre">update_kv_cache_type</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model_architecture</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/builder.html#BuildConfig.update_kv_cache_type"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.update_kv_cache_type" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.use_mrope">
<span class="sig-name descname"><span class="pre">use_mrope</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">False</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.use_mrope" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.use_refit">
<span class="sig-name descname"><span class="pre">use_refit</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">False</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.use_refit" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.use_strip_plan">
<span class="sig-name descname"><span class="pre">use_strip_plan</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">False</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.use_strip_plan" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.visualize_network">
<span class="sig-name descname"><span class="pre">visualize_network</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.visualize_network" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.weight_sparsity">
<span class="sig-name descname"><span class="pre">weight_sparsity</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">False</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.weight_sparsity" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildConfig.weight_streaming">
<span class="sig-name descname"><span class="pre">weight_streaming</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">False</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.BuildConfig.weight_streaming" title="Link to this definition">#</a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="py class">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantConfig">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.llmapi.</span></span><span class="sig-name descname"><span class="pre">QuantConfig</span></span><span class="sig-paren">(</span>
<dl>
<dd><em class="sig-param"><span class="n"><span class="pre">quant_algo</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="../python-api/tensorrt_llm.quantization.html#tensorrt_llm.quantization.QuantAlgo" title="tensorrt_llm.quantization.mode.QuantAlgo"><span class="pre">QuantAlgo</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">kv_cache_quant_algo</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="../python-api/tensorrt_llm.quantization.html#tensorrt_llm.quantization.QuantAlgo" title="tensorrt_llm.quantization.mode.QuantAlgo"><span class="pre">QuantAlgo</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">group_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">128</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">smoothquant_val</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0.5</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">clamp_val</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">float</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">use_meta_recipe</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">has_zero_point</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">pre_quant_scale</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">exclude_modules</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
</dl>
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/modeling_utils.html#QuantConfig"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.QuantConfig" title="Link to this definition">#</a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
<p>Serializable quantization configuration class, part of the PretrainedConfig</p>
<dl class="py method">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantConfig.__init__">
<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span>
<dl>
<dd><em class="sig-param"><span class="n"><span class="pre">quant_algo</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="../python-api/tensorrt_llm.quantization.html#tensorrt_llm.quantization.QuantAlgo" title="tensorrt_llm.quantization.mode.QuantAlgo"><span class="pre">QuantAlgo</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">kv_cache_quant_algo</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="../python-api/tensorrt_llm.quantization.html#tensorrt_llm.quantization.QuantAlgo" title="tensorrt_llm.quantization.mode.QuantAlgo"><span class="pre">QuantAlgo</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">group_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">128</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">smoothquant_val</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">0.5</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">clamp_val</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">float</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">use_meta_recipe</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">has_zero_point</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">pre_quant_scale</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">exclude_modules</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
</dl>
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#tensorrt_llm.llmapi.QuantConfig.__init__" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantConfig.clamp_val">
<span class="sig-name descname"><span class="pre">clamp_val</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">float</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.QuantConfig.clamp_val" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantConfig.exclude_modules">
<span class="sig-name descname"><span class="pre">exclude_modules</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.QuantConfig.exclude_modules" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantConfig.from_dict">
<em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">from_dict</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">config</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">dict</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/modeling_utils.html#QuantConfig.from_dict"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.QuantConfig.from_dict" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantConfig.get_modelopt_kv_cache_dtype">
<span class="sig-name descname"><span class="pre">get_modelopt_kv_cache_dtype</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/modeling_utils.html#QuantConfig.get_modelopt_kv_cache_dtype"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.QuantConfig.get_modelopt_kv_cache_dtype" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantConfig.get_modelopt_qformat">
<span class="sig-name descname"><span class="pre">get_modelopt_qformat</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/modeling_utils.html#QuantConfig.get_modelopt_qformat"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.QuantConfig.get_modelopt_qformat" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantConfig.get_quant_cfg">
<span class="sig-name descname"><span class="pre">get_quant_cfg</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">module_name</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/modeling_utils.html#QuantConfig.get_quant_cfg"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.QuantConfig.get_quant_cfg" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantConfig.group_size">
<span class="sig-name descname"><span class="pre">group_size</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">128</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.QuantConfig.group_size" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantConfig.has_zero_point">
<span class="sig-name descname"><span class="pre">has_zero_point</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">False</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.QuantConfig.has_zero_point" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantConfig.kv_cache_quant_algo">
<span class="sig-name descname"><span class="pre">kv_cache_quant_algo</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="../python-api/tensorrt_llm.quantization.html#tensorrt_llm.quantization.QuantAlgo" title="tensorrt_llm.quantization.mode.QuantAlgo"><span class="pre">QuantAlgo</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.QuantConfig.kv_cache_quant_algo" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantConfig.layer_quant_mode">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">layer_quant_mode</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="../python-api/tensorrt_llm.quantization.html#tensorrt_llm.quantization.QuantMode" title="tensorrt_llm.quantization.mode.QuantMode"><span class="pre">QuantMode</span></a></em><a class="headerlink" href="#tensorrt_llm.llmapi.QuantConfig.layer_quant_mode" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantConfig.pre_quant_scale">
<span class="sig-name descname"><span class="pre">pre_quant_scale</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">False</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.QuantConfig.pre_quant_scale" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantConfig.quant_algo">
<span class="sig-name descname"><span class="pre">quant_algo</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="../python-api/tensorrt_llm.quantization.html#tensorrt_llm.quantization.QuantAlgo" title="tensorrt_llm.quantization.mode.QuantAlgo"><span class="pre">QuantAlgo</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.QuantConfig.quant_algo" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantConfig.quant_mode">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">quant_mode</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">QuantModeWrapper</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.QuantConfig.quant_mode" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantConfig.requires_calibration">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">requires_calibration</span></span><a class="headerlink" href="#tensorrt_llm.llmapi.QuantConfig.requires_calibration" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantConfig.requires_modelopt_quantization">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">requires_modelopt_quantization</span></span><a class="headerlink" href="#tensorrt_llm.llmapi.QuantConfig.requires_modelopt_quantization" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantConfig.smoothquant_val">
<span class="sig-name descname"><span class="pre">smoothquant_val</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">float</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0.5</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.QuantConfig.smoothquant_val" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantConfig.to_dict">
<span class="sig-name descname"><span class="pre">to_dict</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/models/modeling_utils.html#QuantConfig.to_dict"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.QuantConfig.to_dict" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantConfig.use_meta_recipe">
<span class="sig-name descname"><span class="pre">use_meta_recipe</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">False</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.QuantConfig.use_meta_recipe" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantConfig.use_plugin_sq">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">use_plugin_sq</span></span><a class="headerlink" href="#tensorrt_llm.llmapi.QuantConfig.use_plugin_sq" title="Link to this definition">#</a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="py class">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantAlgo">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.llmapi.</span></span><span class="sig-name descname"><span class="pre">QuantAlgo</span></span><span class="sig-paren">(</span>
<dl>
<dd><em class="sig-param"><span class="n"><span class="pre">value</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">names=&lt;not</span> <span class="pre">given&gt;</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">*values</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">module=None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">qualname=None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">type=None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">start=1</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">boundary=None</span></span></em>,</dd>
</dl>
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/quantization/mode.html#QuantAlgo"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.QuantAlgo" title="Link to this definition">#</a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">StrEnum</span></code></p>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantAlgo.FP8">
<span class="sig-name descname"><span class="pre">FP8</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'FP8'</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.QuantAlgo.FP8" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantAlgo.FP8_PER_CHANNEL_PER_TOKEN">
<span class="sig-name descname"><span class="pre">FP8_PER_CHANNEL_PER_TOKEN</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'FP8_PER_CHANNEL_PER_TOKEN'</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.QuantAlgo.FP8_PER_CHANNEL_PER_TOKEN" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantAlgo.INT8">
<span class="sig-name descname"><span class="pre">INT8</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'INT8'</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.QuantAlgo.INT8" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantAlgo.MIXED_PRECISION">
<span class="sig-name descname"><span class="pre">MIXED_PRECISION</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'MIXED_PRECISION'</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.QuantAlgo.MIXED_PRECISION" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantAlgo.NO_QUANT">
<span class="sig-name descname"><span class="pre">NO_QUANT</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'NO_QUANT'</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.QuantAlgo.NO_QUANT" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantAlgo.NVFP4">
<span class="sig-name descname"><span class="pre">NVFP4</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'NVFP4'</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.QuantAlgo.NVFP4" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantAlgo.W4A16">
<span class="sig-name descname"><span class="pre">W4A16</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'W4A16'</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.QuantAlgo.W4A16" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantAlgo.W4A16_AWQ">
<span class="sig-name descname"><span class="pre">W4A16_AWQ</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'W4A16_AWQ'</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.QuantAlgo.W4A16_AWQ" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantAlgo.W4A16_GPTQ">
<span class="sig-name descname"><span class="pre">W4A16_GPTQ</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'W4A16_GPTQ'</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.QuantAlgo.W4A16_GPTQ" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantAlgo.W4A8_AWQ">
<span class="sig-name descname"><span class="pre">W4A8_AWQ</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'W4A8_AWQ'</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.QuantAlgo.W4A8_AWQ" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantAlgo.W4A8_QSERVE_PER_CHANNEL">
<span class="sig-name descname"><span class="pre">W4A8_QSERVE_PER_CHANNEL</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'W4A8_QSERVE_PER_CHANNEL'</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.QuantAlgo.W4A8_QSERVE_PER_CHANNEL" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantAlgo.W4A8_QSERVE_PER_GROUP">
<span class="sig-name descname"><span class="pre">W4A8_QSERVE_PER_GROUP</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'W4A8_QSERVE_PER_GROUP'</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.QuantAlgo.W4A8_QSERVE_PER_GROUP" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantAlgo.W8A16">
<span class="sig-name descname"><span class="pre">W8A16</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'W8A16'</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.QuantAlgo.W8A16" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantAlgo.W8A16_GPTQ">
<span class="sig-name descname"><span class="pre">W8A16_GPTQ</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'W8A16_GPTQ'</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.QuantAlgo.W8A16_GPTQ" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_CHANNEL">
<span class="sig-name descname"><span class="pre">W8A8_SQ_PER_CHANNEL</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'W8A8_SQ_PER_CHANNEL'</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_CHANNEL" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_CHANNEL_PER_TENSOR_PLUGIN">
<span class="sig-name descname"><span class="pre">W8A8_SQ_PER_CHANNEL_PER_TENSOR_PLUGIN</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'W8A8_SQ_PER_CHANNEL_PER_TENSOR_PLUGIN'</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_CHANNEL_PER_TENSOR_PLUGIN" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_CHANNEL_PER_TOKEN_PLUGIN">
<span class="sig-name descname"><span class="pre">W8A8_SQ_PER_CHANNEL_PER_TOKEN_PLUGIN</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'W8A8_SQ_PER_CHANNEL_PER_TOKEN_PLUGIN'</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_CHANNEL_PER_TOKEN_PLUGIN" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_TENSOR_PER_TOKEN_PLUGIN">
<span class="sig-name descname"><span class="pre">W8A8_SQ_PER_TENSOR_PER_TOKEN_PLUGIN</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'W8A8_SQ_PER_TENSOR_PER_TOKEN_PLUGIN'</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_TENSOR_PER_TOKEN_PLUGIN" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_TENSOR_PLUGIN">
<span class="sig-name descname"><span class="pre">W8A8_SQ_PER_TENSOR_PLUGIN</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'W8A8_SQ_PER_TENSOR_PLUGIN'</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_TENSOR_PLUGIN" title="Link to this definition">#</a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="py class">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.CalibConfig">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.llmapi.</span></span><span class="sig-name descname"><span class="pre">CalibConfig</span></span><span class="sig-paren">(</span>
<dl>
<dd><em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Literal</span><span class="p"><span class="pre">[</span></span><span class="s"><span class="pre">'cuda'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'cpu'</span></span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'cuda'</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">calib_dataset</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'cnn_dailymail'</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">calib_batches</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">512</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">calib_batch_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">calib_max_seq_length</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">512</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">random_seed</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1234</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">tokenizer_max_seq_length</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">2048</span></span></em>,</dd>
</dl>
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/llmapi/llm_utils.html#CalibConfig"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.CalibConfig" title="Link to this definition">#</a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
<p>Calibration configuration.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>device</strong> (<em>Literal</em><em>[</em><em>'cuda'</em><em>, </em><em>'cpu'</em><em>]</em><em>, </em><em>default='cuda'</em>) The device to run calibration.</p></li>
<li><p><strong>calib_dataset</strong> (<em>str</em><em>, </em><em>default='cnn_dailymail'</em>) The name or local path of calibration dataset.</p></li>
<li><p><strong>calib_batches</strong> (<em>int</em><em>, </em><em>default=512</em>) The number of batches that the calibration runs.</p></li>
<li><p><strong>calib_batch_size</strong> (<em>int</em><em>, </em><em>default=1</em>) The batch size that the calibration runs.</p></li>
<li><p><strong>calib_max_seq_length</strong> (<em>int</em><em>, </em><em>default=512</em>) The maximum sequence length that the calibration runs.</p></li>
<li><p><strong>random_seed</strong> (<em>int</em><em>, </em><em>default=1234</em>) The random seed used for calibration.</p></li>
<li><p><strong>tokenizer_max_seq_length</strong> (<em>int</em><em>, </em><em>default=2048</em>) The maximum sequence length to initialize tokenizer for calibration.</p></li>
</ul>
</dd>
</dl>
<dl class="py method">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.CalibConfig.__init__">
<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span>
<dl>
<dd><em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Literal</span><span class="p"><span class="pre">[</span></span><span class="s"><span class="pre">'cuda'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'cpu'</span></span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'cuda'</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">calib_dataset</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'cnn_dailymail'</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">calib_batches</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">512</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">calib_batch_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">calib_max_seq_length</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">512</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">random_seed</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1234</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">tokenizer_max_seq_length</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">2048</span></span></em>,</dd>
</dl>
<span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#tensorrt_llm.llmapi.CalibConfig.__init__" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.CalibConfig.calib_batch_size">
<span class="sig-name descname"><span class="pre">calib_batch_size</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.CalibConfig.calib_batch_size" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.CalibConfig.calib_batches">
<span class="sig-name descname"><span class="pre">calib_batches</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.CalibConfig.calib_batches" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.CalibConfig.calib_dataset">
<span class="sig-name descname"><span class="pre">calib_dataset</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.CalibConfig.calib_dataset" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.CalibConfig.calib_max_seq_length">
<span class="sig-name descname"><span class="pre">calib_max_seq_length</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.CalibConfig.calib_max_seq_length" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.CalibConfig.device">
<span class="sig-name descname"><span class="pre">device</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Literal</span><span class="p"><span class="pre">[</span></span><span class="s"><span class="pre">'cuda'</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="s"><span class="pre">'cpu'</span></span><span class="p"><span class="pre">]</span></span></em><a class="headerlink" href="#tensorrt_llm.llmapi.CalibConfig.device" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.CalibConfig.from_dict">
<em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">from_dict</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">config</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">dict</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/llmapi/llm_utils.html#CalibConfig.from_dict"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.CalibConfig.from_dict" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.CalibConfig.random_seed">
<span class="sig-name descname"><span class="pre">random_seed</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.CalibConfig.random_seed" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.CalibConfig.to_dict">
<span class="sig-name descname"><span class="pre">to_dict</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/llmapi/llm_utils.html#CalibConfig.to_dict"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.CalibConfig.to_dict" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.CalibConfig.tokenizer_max_seq_length">
<span class="sig-name descname"><span class="pre">tokenizer_max_seq_length</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.CalibConfig.tokenizer_max_seq_length" title="Link to this definition">#</a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="py class">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildCacheConfig">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.llmapi.</span></span><span class="sig-name descname"><span class="pre">BuildCacheConfig</span></span><span class="sig-paren">(</span>
<dl>
<dd><em class="sig-param"><span class="n"><span class="pre">cache_root</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Path</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">max_records</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">10</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">max_cache_storage_gb</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">256</span></span></em>,</dd>
</dl>
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/llmapi/build_cache.html#BuildCacheConfig"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.BuildCacheConfig" title="Link to this definition">#</a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
<p>Configuration for the build cache.</p>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildCacheConfig.cache_root">
<span class="sig-name descname"><span class="pre">cache_root</span></span><a class="headerlink" href="#tensorrt_llm.llmapi.BuildCacheConfig.cache_root" title="Link to this definition">#</a></dt>
<dd><p>The root directory for the build cache.</p>
<dl class="field-list simple">
<dt class="field-odd">Type<span class="colon">:</span></dt>
<dd class="field-odd"><p>str</p>
</dd>
</dl>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildCacheConfig.max_records">
<span class="sig-name descname"><span class="pre">max_records</span></span><a class="headerlink" href="#tensorrt_llm.llmapi.BuildCacheConfig.max_records" title="Link to this definition">#</a></dt>
<dd><p>The maximum number of records to store in the cache.</p>
<dl class="field-list simple">
<dt class="field-odd">Type<span class="colon">:</span></dt>
<dd class="field-odd"><p>int</p>
</dd>
</dl>
</dd></dl>
<dl class="py attribute">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildCacheConfig.max_cache_storage_gb">
<span class="sig-name descname"><span class="pre">max_cache_storage_gb</span></span><a class="headerlink" href="#tensorrt_llm.llmapi.BuildCacheConfig.max_cache_storage_gb" title="Link to this definition">#</a></dt>
<dd><p>The maximum amount of storage (in GB) to use for the cache.</p>
<dl class="field-list simple">
<dt class="field-odd">Type<span class="colon">:</span></dt>
<dd class="field-odd"><p>float</p>
</dd>
</dl>
</dd></dl>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>The build-cache assumes the weights of the model are not changed during the execution. If the weights are
changed, you should remove the caches manually.</p>
</div>
<dl class="py method">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.BuildCacheConfig.__init__">
<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span>
<dl>
<dd><em class="sig-param"><span class="n"><span class="pre">cache_root</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Path</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">max_records</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">10</span></span></em>,</dd>
<dd><em class="sig-param"><span class="n"><span class="pre">max_cache_storage_gb</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">float</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">256</span></span></em>,</dd>
</dl>
<span class="sig-paren">)</span><a class="reference internal" href="../_modules/tensorrt_llm/llmapi/build_cache.html#BuildCacheConfig.__init__"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.BuildCacheConfig.__init__" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="id0">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">cache_root</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Path</span></em><a class="headerlink" href="#id0" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="id1">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">max_cache_storage_gb</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">float</span></em><a class="headerlink" href="#id1" title="Link to this definition">#</a></dt>
<dd></dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="id2">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">max_records</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><a class="headerlink" href="#id2" title="Link to this definition">#</a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="py class">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.RequestError">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.llmapi.</span></span><span class="sig-name descname"><span class="pre">RequestError</span></span><a class="reference internal" href="../_modules/tensorrt_llm/executor.html#RequestError"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.RequestError" title="Link to this definition">#</a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">RuntimeError</span></code></p>
<p>The error raised when the request is failed.</p>
</dd></dl>
<dl class="py class">
<dt class="sig sig-object py" id="tensorrt_llm.llmapi.NoStatsAvailable">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">tensorrt_llm.llmapi.</span></span><span class="sig-name descname"><span class="pre">NoStatsAvailable</span></span><a class="reference internal" href="../_modules/tensorrt_llm/executor.html#NoStatsAvailable"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.NoStatsAvailable" title="Link to this definition">#</a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Exception</span></code></p>
</dd></dl>
</section>
</article>
<footer class="prev-next-footer d-print-none">
<div class="prev-next-area">
<a class="left-prev"
href="index.html"
title="previous page">
<i class="fa-solid fa-angle-left"></i>
<div class="prev-next-info">
<p class="prev-next-subtitle">previous</p>
<p class="prev-next-title">API Introduction</p>
</div>
</a>
<a class="right-next"
href="../llm-api-examples/index.html"
title="next page">
<div class="prev-next-info">
<p class="prev-next-subtitle">next</p>
<p class="prev-next-title">LLM Examples Introduction</p>
</div>
<i class="fa-solid fa-angle-right"></i>
</a>
</div>
</footer>
</div>
<dialog id="pst-secondary-sidebar-modal"></dialog>
<div id="pst-secondary-sidebar" class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
<div class="sidebar-secondary-item">
<div
id="pst-page-navigation-heading-2"
class="page-toc tocsection onthispage">
<i class="fa-solid fa-list"></i> On this page
</div>
<nav class="bd-toc-nav page-toc" aria-labelledby="pst-page-navigation-heading-2">
<ul class="visible nav section-nav flex-column">
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.LLM"><code class="docutils literal notranslate"><span class="pre">LLM</span></code></a><ul class="nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.LLM.__init__"><code class="docutils literal notranslate"><span class="pre">__init__()</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.LLM.generate"><code class="docutils literal notranslate"><span class="pre">generate()</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.LLM.generate_async"><code class="docutils literal notranslate"><span class="pre">generate_async()</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.LLM.save"><code class="docutils literal notranslate"><span class="pre">save()</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.LLM.shutdown"><code class="docutils literal notranslate"><span class="pre">shutdown()</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.LLM.tokenizer"><code class="docutils literal notranslate"><span class="pre">tokenizer</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.LLM.workspace"><code class="docutils literal notranslate"><span class="pre">workspace</span></code></a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.RequestOutput"><code class="docutils literal notranslate"><span class="pre">RequestOutput</span></code></a><ul class="nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.RequestOutput.__init__"><code class="docutils literal notranslate"><span class="pre">__init__()</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.RequestOutput.handle_response"><code class="docutils literal notranslate"><span class="pre">handle_response()</span></code></a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.GuidedDecodingParams"><code class="docutils literal notranslate"><span class="pre">GuidedDecodingParams</span></code></a><ul class="nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.GuidedDecodingParams.__init__"><code class="docutils literal notranslate"><span class="pre">__init__()</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.GuidedDecodingParams.grammar"><code class="docutils literal notranslate"><span class="pre">grammar</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.GuidedDecodingParams.json"><code class="docutils literal notranslate"><span class="pre">json</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.GuidedDecodingParams.json_object"><code class="docutils literal notranslate"><span class="pre">json_object</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.GuidedDecodingParams.num_guides"><code class="docutils literal notranslate"><span class="pre">num_guides</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.GuidedDecodingParams.regex"><code class="docutils literal notranslate"><span class="pre">regex</span></code></a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams"><code class="docutils literal notranslate"><span class="pre">SamplingParams</span></code></a><ul class="nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.__init__"><code class="docutils literal notranslate"><span class="pre">__init__()</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.add_special_tokens"><code class="docutils literal notranslate"><span class="pre">add_special_tokens</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.additional_model_outputs"><code class="docutils literal notranslate"><span class="pre">additional_model_outputs</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.bad"><code class="docutils literal notranslate"><span class="pre">bad</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.bad_token_ids"><code class="docutils literal notranslate"><span class="pre">bad_token_ids</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.beam_search_diversity_rate"><code class="docutils literal notranslate"><span class="pre">beam_search_diversity_rate</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.beam_width"><code class="docutils literal notranslate"><span class="pre">beam_width</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.best_of"><code class="docutils literal notranslate"><span class="pre">best_of</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.detokenize"><code class="docutils literal notranslate"><span class="pre">detokenize</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.early_stopping"><code class="docutils literal notranslate"><span class="pre">early_stopping</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.embedding_bias"><code class="docutils literal notranslate"><span class="pre">embedding_bias</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.end_id"><code class="docutils literal notranslate"><span class="pre">end_id</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.exclude_input_from_output"><code class="docutils literal notranslate"><span class="pre">exclude_input_from_output</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.external_draft_tokens_config"><code class="docutils literal notranslate"><span class="pre">external_draft_tokens_config</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.frequency_penalty"><code class="docutils literal notranslate"><span class="pre">frequency_penalty</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.greedy_decoding"><code class="docutils literal notranslate"><span class="pre">greedy_decoding</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.guided_decoding"><code class="docutils literal notranslate"><span class="pre">guided_decoding</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.ignore_eos"><code class="docutils literal notranslate"><span class="pre">ignore_eos</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.include_stop_str_in_output"><code class="docutils literal notranslate"><span class="pre">include_stop_str_in_output</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.length_penalty"><code class="docutils literal notranslate"><span class="pre">length_penalty</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.logits_post_processor_name"><code class="docutils literal notranslate"><span class="pre">logits_post_processor_name</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.lookahead_config"><code class="docutils literal notranslate"><span class="pre">lookahead_config</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.max_new_tokens"><code class="docutils literal notranslate"><span class="pre">max_new_tokens</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.max_tokens"><code class="docutils literal notranslate"><span class="pre">max_tokens</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.min_length"><code class="docutils literal notranslate"><span class="pre">min_length</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.min_tokens"><code class="docutils literal notranslate"><span class="pre">min_tokens</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.n"><code class="docutils literal notranslate"><span class="pre">n</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.no_repeat_ngram_size"><code class="docutils literal notranslate"><span class="pre">no_repeat_ngram_size</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.num_return_sequences"><code class="docutils literal notranslate"><span class="pre">num_return_sequences</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.pad_id"><code class="docutils literal notranslate"><span class="pre">pad_id</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.presence_penalty"><code class="docutils literal notranslate"><span class="pre">presence_penalty</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.random_seed"><code class="docutils literal notranslate"><span class="pre">random_seed</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.repetition_penalty"><code class="docutils literal notranslate"><span class="pre">repetition_penalty</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.return_context_logits"><code class="docutils literal notranslate"><span class="pre">return_context_logits</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.return_encoder_output"><code class="docutils literal notranslate"><span class="pre">return_encoder_output</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.return_generation_logits"><code class="docutils literal notranslate"><span class="pre">return_generation_logits</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.return_log_probs"><code class="docutils literal notranslate"><span class="pre">return_log_probs</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.return_perf_metrics"><code class="docutils literal notranslate"><span class="pre">return_perf_metrics</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.seed"><code class="docutils literal notranslate"><span class="pre">seed</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.setup"><code class="docutils literal notranslate"><span class="pre">setup()</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.skip_special_tokens"><code class="docutils literal notranslate"><span class="pre">skip_special_tokens</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.spaces_between_special_tokens"><code class="docutils literal notranslate"><span class="pre">spaces_between_special_tokens</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.stop"><code class="docutils literal notranslate"><span class="pre">stop</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.stop_token_ids"><code class="docutils literal notranslate"><span class="pre">stop_token_ids</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.temperature"><code class="docutils literal notranslate"><span class="pre">temperature</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.top_k"><code class="docutils literal notranslate"><span class="pre">top_k</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.top_p"><code class="docutils literal notranslate"><span class="pre">top_p</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.top_p_decay"><code class="docutils literal notranslate"><span class="pre">top_p_decay</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.top_p_min"><code class="docutils literal notranslate"><span class="pre">top_p_min</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.top_p_reset_ids"><code class="docutils literal notranslate"><span class="pre">top_p_reset_ids</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.truncate_prompt_tokens"><code class="docutils literal notranslate"><span class="pre">truncate_prompt_tokens</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SamplingParams.use_beam_search"><code class="docutils literal notranslate"><span class="pre">use_beam_search</span></code></a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.KvCacheConfig"><code class="docutils literal notranslate"><span class="pre">KvCacheConfig</span></code></a><ul class="nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.KvCacheConfig.__init__"><code class="docutils literal notranslate"><span class="pre">__init__()</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.KvCacheConfig.cross_kv_cache_fraction"><code class="docutils literal notranslate"><span class="pre">cross_kv_cache_fraction</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.KvCacheConfig.enable_block_reuse"><code class="docutils literal notranslate"><span class="pre">enable_block_reuse</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.KvCacheConfig.event_buffer_max_size"><code class="docutils literal notranslate"><span class="pre">event_buffer_max_size</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.KvCacheConfig.fill_empty_fields_from_runtime_defaults"><code class="docutils literal notranslate"><span class="pre">fill_empty_fields_from_runtime_defaults()</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.KvCacheConfig.free_gpu_memory_fraction"><code class="docutils literal notranslate"><span class="pre">free_gpu_memory_fraction</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.KvCacheConfig.host_cache_size"><code class="docutils literal notranslate"><span class="pre">host_cache_size</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.KvCacheConfig.max_attention_window"><code class="docutils literal notranslate"><span class="pre">max_attention_window</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.KvCacheConfig.max_tokens"><code class="docutils literal notranslate"><span class="pre">max_tokens</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.KvCacheConfig.onboard_blocks"><code class="docutils literal notranslate"><span class="pre">onboard_blocks</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.KvCacheConfig.secondary_offload_min_priority"><code class="docutils literal notranslate"><span class="pre">secondary_offload_min_priority</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.KvCacheConfig.sink_token_length"><code class="docutils literal notranslate"><span class="pre">sink_token_length</span></code></a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig"><code class="docutils literal notranslate"><span class="pre">LookaheadDecodingConfig</span></code></a><ul class="nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig.__init__"><code class="docutils literal notranslate"><span class="pre">__init__()</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig.calculate_speculative_resource"><code class="docutils literal notranslate"><span class="pre">calculate_speculative_resource()</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig.max_ngram_size"><code class="docutils literal notranslate"><span class="pre">max_ngram_size</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig.max_verification_set_size"><code class="docutils literal notranslate"><span class="pre">max_verification_set_size</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig.max_window_size"><code class="docutils literal notranslate"><span class="pre">max_window_size</span></code></a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.MedusaDecodingConfig"><code class="docutils literal notranslate"><span class="pre">MedusaDecodingConfig</span></code></a><ul class="nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.MedusaDecodingConfig.__init__"><code class="docutils literal notranslate"><span class="pre">__init__()</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.MedusaDecodingConfig.medusa_choices"><code class="docutils literal notranslate"><span class="pre">medusa_choices</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.MedusaDecodingConfig.num_medusa_heads"><code class="docutils literal notranslate"><span class="pre">num_medusa_heads</span></code></a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SchedulerConfig"><code class="docutils literal notranslate"><span class="pre">SchedulerConfig</span></code></a><ul class="nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SchedulerConfig.__init__"><code class="docutils literal notranslate"><span class="pre">__init__()</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SchedulerConfig.capacity_scheduler_policy"><code class="docutils literal notranslate"><span class="pre">capacity_scheduler_policy</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SchedulerConfig.context_chunking_policy"><code class="docutils literal notranslate"><span class="pre">context_chunking_policy</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.SchedulerConfig.dynamic_batch_config"><code class="docutils literal notranslate"><span class="pre">dynamic_batch_config</span></code></a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.CapacitySchedulerPolicy"><code class="docutils literal notranslate"><span class="pre">CapacitySchedulerPolicy</span></code></a><ul class="nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.CapacitySchedulerPolicy.GUARANTEED_NO_EVICT"><code class="docutils literal notranslate"><span class="pre">GUARANTEED_NO_EVICT</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.CapacitySchedulerPolicy.MAX_UTILIZATION"><code class="docutils literal notranslate"><span class="pre">MAX_UTILIZATION</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.CapacitySchedulerPolicy.STATIC_BATCH"><code class="docutils literal notranslate"><span class="pre">STATIC_BATCH</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.CapacitySchedulerPolicy.__init__"><code class="docutils literal notranslate"><span class="pre">__init__()</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.CapacitySchedulerPolicy.name"><code class="docutils literal notranslate"><span class="pre">name</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.CapacitySchedulerPolicy.value"><code class="docutils literal notranslate"><span class="pre">value</span></code></a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig"><code class="docutils literal notranslate"><span class="pre">BuildConfig</span></code></a><ul class="nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.__init__"><code class="docutils literal notranslate"><span class="pre">__init__()</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.auto_parallel_config"><code class="docutils literal notranslate"><span class="pre">auto_parallel_config</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.dry_run"><code class="docutils literal notranslate"><span class="pre">dry_run</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.enable_debug_output"><code class="docutils literal notranslate"><span class="pre">enable_debug_output</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.force_num_profiles"><code class="docutils literal notranslate"><span class="pre">force_num_profiles</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.from_dict"><code class="docutils literal notranslate"><span class="pre">from_dict()</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.from_json_file"><code class="docutils literal notranslate"><span class="pre">from_json_file()</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.gather_context_logits"><code class="docutils literal notranslate"><span class="pre">gather_context_logits</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.gather_generation_logits"><code class="docutils literal notranslate"><span class="pre">gather_generation_logits</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.input_timing_cache"><code class="docutils literal notranslate"><span class="pre">input_timing_cache</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.kv_cache_type"><code class="docutils literal notranslate"><span class="pre">kv_cache_type</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.lora_config"><code class="docutils literal notranslate"><span class="pre">lora_config</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.max_batch_size"><code class="docutils literal notranslate"><span class="pre">max_batch_size</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.max_beam_width"><code class="docutils literal notranslate"><span class="pre">max_beam_width</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.max_draft_len"><code class="docutils literal notranslate"><span class="pre">max_draft_len</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.max_encoder_input_len"><code class="docutils literal notranslate"><span class="pre">max_encoder_input_len</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.max_input_len"><code class="docutils literal notranslate"><span class="pre">max_input_len</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.max_num_tokens"><code class="docutils literal notranslate"><span class="pre">max_num_tokens</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.max_prompt_embedding_table_size"><code class="docutils literal notranslate"><span class="pre">max_prompt_embedding_table_size</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.max_seq_len"><code class="docutils literal notranslate"><span class="pre">max_seq_len</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.monitor_memory"><code class="docutils literal notranslate"><span class="pre">monitor_memory</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.opt_batch_size"><code class="docutils literal notranslate"><span class="pre">opt_batch_size</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.opt_num_tokens"><code class="docutils literal notranslate"><span class="pre">opt_num_tokens</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.output_timing_cache"><code class="docutils literal notranslate"><span class="pre">output_timing_cache</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.plugin_config"><code class="docutils literal notranslate"><span class="pre">plugin_config</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.profiling_verbosity"><code class="docutils literal notranslate"><span class="pre">profiling_verbosity</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.speculative_decoding_mode"><code class="docutils literal notranslate"><span class="pre">speculative_decoding_mode</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.strongly_typed"><code class="docutils literal notranslate"><span class="pre">strongly_typed</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.to_dict"><code class="docutils literal notranslate"><span class="pre">to_dict()</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.update"><code class="docutils literal notranslate"><span class="pre">update()</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.update_from_dict"><code class="docutils literal notranslate"><span class="pre">update_from_dict()</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.update_kv_cache_type"><code class="docutils literal notranslate"><span class="pre">update_kv_cache_type()</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.use_mrope"><code class="docutils literal notranslate"><span class="pre">use_mrope</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.use_refit"><code class="docutils literal notranslate"><span class="pre">use_refit</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.use_strip_plan"><code class="docutils literal notranslate"><span class="pre">use_strip_plan</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.visualize_network"><code class="docutils literal notranslate"><span class="pre">visualize_network</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.weight_sparsity"><code class="docutils literal notranslate"><span class="pre">weight_sparsity</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildConfig.weight_streaming"><code class="docutils literal notranslate"><span class="pre">weight_streaming</span></code></a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantConfig"><code class="docutils literal notranslate"><span class="pre">QuantConfig</span></code></a><ul class="nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantConfig.__init__"><code class="docutils literal notranslate"><span class="pre">__init__()</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantConfig.clamp_val"><code class="docutils literal notranslate"><span class="pre">clamp_val</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantConfig.exclude_modules"><code class="docutils literal notranslate"><span class="pre">exclude_modules</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantConfig.from_dict"><code class="docutils literal notranslate"><span class="pre">from_dict()</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantConfig.get_modelopt_kv_cache_dtype"><code class="docutils literal notranslate"><span class="pre">get_modelopt_kv_cache_dtype()</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantConfig.get_modelopt_qformat"><code class="docutils literal notranslate"><span class="pre">get_modelopt_qformat()</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantConfig.get_quant_cfg"><code class="docutils literal notranslate"><span class="pre">get_quant_cfg()</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantConfig.group_size"><code class="docutils literal notranslate"><span class="pre">group_size</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantConfig.has_zero_point"><code class="docutils literal notranslate"><span class="pre">has_zero_point</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantConfig.kv_cache_quant_algo"><code class="docutils literal notranslate"><span class="pre">kv_cache_quant_algo</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantConfig.layer_quant_mode"><code class="docutils literal notranslate"><span class="pre">layer_quant_mode</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantConfig.pre_quant_scale"><code class="docutils literal notranslate"><span class="pre">pre_quant_scale</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantConfig.quant_algo"><code class="docutils literal notranslate"><span class="pre">quant_algo</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantConfig.quant_mode"><code class="docutils literal notranslate"><span class="pre">quant_mode</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantConfig.requires_calibration"><code class="docutils literal notranslate"><span class="pre">requires_calibration</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantConfig.requires_modelopt_quantization"><code class="docutils literal notranslate"><span class="pre">requires_modelopt_quantization</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantConfig.smoothquant_val"><code class="docutils literal notranslate"><span class="pre">smoothquant_val</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantConfig.to_dict"><code class="docutils literal notranslate"><span class="pre">to_dict()</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantConfig.use_meta_recipe"><code class="docutils literal notranslate"><span class="pre">use_meta_recipe</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantConfig.use_plugin_sq"><code class="docutils literal notranslate"><span class="pre">use_plugin_sq</span></code></a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantAlgo"><code class="docutils literal notranslate"><span class="pre">QuantAlgo</span></code></a><ul class="nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantAlgo.FP8"><code class="docutils literal notranslate"><span class="pre">FP8</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantAlgo.FP8_PER_CHANNEL_PER_TOKEN"><code class="docutils literal notranslate"><span class="pre">FP8_PER_CHANNEL_PER_TOKEN</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantAlgo.INT8"><code class="docutils literal notranslate"><span class="pre">INT8</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantAlgo.MIXED_PRECISION"><code class="docutils literal notranslate"><span class="pre">MIXED_PRECISION</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantAlgo.NO_QUANT"><code class="docutils literal notranslate"><span class="pre">NO_QUANT</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantAlgo.NVFP4"><code class="docutils literal notranslate"><span class="pre">NVFP4</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantAlgo.W4A16"><code class="docutils literal notranslate"><span class="pre">W4A16</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantAlgo.W4A16_AWQ"><code class="docutils literal notranslate"><span class="pre">W4A16_AWQ</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantAlgo.W4A16_GPTQ"><code class="docutils literal notranslate"><span class="pre">W4A16_GPTQ</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantAlgo.W4A8_AWQ"><code class="docutils literal notranslate"><span class="pre">W4A8_AWQ</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantAlgo.W4A8_QSERVE_PER_CHANNEL"><code class="docutils literal notranslate"><span class="pre">W4A8_QSERVE_PER_CHANNEL</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantAlgo.W4A8_QSERVE_PER_GROUP"><code class="docutils literal notranslate"><span class="pre">W4A8_QSERVE_PER_GROUP</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantAlgo.W8A16"><code class="docutils literal notranslate"><span class="pre">W8A16</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantAlgo.W8A16_GPTQ"><code class="docutils literal notranslate"><span class="pre">W8A16_GPTQ</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_CHANNEL"><code class="docutils literal notranslate"><span class="pre">W8A8_SQ_PER_CHANNEL</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_CHANNEL_PER_TENSOR_PLUGIN"><code class="docutils literal notranslate"><span class="pre">W8A8_SQ_PER_CHANNEL_PER_TENSOR_PLUGIN</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_CHANNEL_PER_TOKEN_PLUGIN"><code class="docutils literal notranslate"><span class="pre">W8A8_SQ_PER_CHANNEL_PER_TOKEN_PLUGIN</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_TENSOR_PER_TOKEN_PLUGIN"><code class="docutils literal notranslate"><span class="pre">W8A8_SQ_PER_TENSOR_PER_TOKEN_PLUGIN</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_TENSOR_PLUGIN"><code class="docutils literal notranslate"><span class="pre">W8A8_SQ_PER_TENSOR_PLUGIN</span></code></a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.CalibConfig"><code class="docutils literal notranslate"><span class="pre">CalibConfig</span></code></a><ul class="nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.CalibConfig.__init__"><code class="docutils literal notranslate"><span class="pre">__init__()</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.CalibConfig.calib_batch_size"><code class="docutils literal notranslate"><span class="pre">calib_batch_size</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.CalibConfig.calib_batches"><code class="docutils literal notranslate"><span class="pre">calib_batches</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.CalibConfig.calib_dataset"><code class="docutils literal notranslate"><span class="pre">calib_dataset</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.CalibConfig.calib_max_seq_length"><code class="docutils literal notranslate"><span class="pre">calib_max_seq_length</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.CalibConfig.device"><code class="docutils literal notranslate"><span class="pre">device</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.CalibConfig.from_dict"><code class="docutils literal notranslate"><span class="pre">from_dict()</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.CalibConfig.random_seed"><code class="docutils literal notranslate"><span class="pre">random_seed</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.CalibConfig.to_dict"><code class="docutils literal notranslate"><span class="pre">to_dict()</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.CalibConfig.tokenizer_max_seq_length"><code class="docutils literal notranslate"><span class="pre">tokenizer_max_seq_length</span></code></a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildCacheConfig"><code class="docutils literal notranslate"><span class="pre">BuildCacheConfig</span></code></a><ul class="nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildCacheConfig.cache_root"><code class="docutils literal notranslate"><span class="pre">cache_root</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildCacheConfig.max_records"><code class="docutils literal notranslate"><span class="pre">max_records</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildCacheConfig.max_cache_storage_gb"><code class="docutils literal notranslate"><span class="pre">max_cache_storage_gb</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.BuildCacheConfig.__init__"><code class="docutils literal notranslate"><span class="pre">__init__()</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id0"><code class="docutils literal notranslate"><span class="pre">cache_root</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id1"><code class="docutils literal notranslate"><span class="pre">max_cache_storage_gb</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#id2"><code class="docutils literal notranslate"><span class="pre">max_records</span></code></a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.RequestError"><code class="docutils literal notranslate"><span class="pre">RequestError</span></code></a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#tensorrt_llm.llmapi.NoStatsAvailable"><code class="docutils literal notranslate"><span class="pre">NoStatsAvailable</span></code></a></li>
</ul>
</nav></div>
</div></div>
</div>
<footer class="bd-footer-content">
</footer>
</main>
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
<div class="footer-items__start">
<div class="footer-item">
<a class="footer-brand logo" href="https://www.nvidia.com">
<img src="../_static/nvidia-logo-horiz-rgb-1c-blk-for-screen.svg" class="logo__image only-light" alt="NVIDIA"/>
<img src="../_static/nvidia-logo-horiz-rgb-1c-wht-for-screen.svg" class="logo__image only-dark" alt="NVIDIA"/>
</a></div>
<div class="footer-item">
<div class="footer-links">
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-policy/">Privacy Policy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
|
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/terms-of-service/">Terms of Service</a>
|
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/accessibility/">Accessibility</a>
|
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/company-policies/">Corporate Policies</a>
|
<a class="external" href="https://www.nvidia.com/en-us/product-security/">Product Security</a>
|
<a class="external" href="https://www.nvidia.com/en-us/contact/">Contact</a>
</div>
</div>
<div class="footer-item">
<p class="copyright">
Copyright © 2024, NVidia.
<br/>
</p>
</div>
</div>
</div>
</footer>
</body>
</html>