TensorRT-LLMs/genindex.html



<!DOCTYPE html>


<html lang="en" data-content_root="./" >

  <head>
    <meta charset="utf-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>Index &#8212; TensorRT LLM</title>


  <script data-cfasync="false">
    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
  </script>
  <!--
    this give us a css class that will be invisible only if js is disabled
  -->
  <noscript>
    <style>
      .pst-js-only { display: none !important; }

    </style>
  </noscript>

  <!-- Loaded before other Sphinx assets -->
  <link href="_static/styles/theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link href="_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />

    <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=8f2a1f02" />
    <link rel="stylesheet" type="text/css" href="_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
    <link rel="stylesheet" type="text/css" href="_static/copybutton.css?v=76b2166b" />
    <link rel="stylesheet" type="text/css" href="_static/autodoc_pydantic.css" />
    <link rel="stylesheet" type="text/css" href="_static/togglebutton.css?v=13237357" />
    <link rel="stylesheet" type="text/css" href="_static/custom.css?v=19d20f17" />

  <!-- So that users can add custom icons -->
  <script src="_static/scripts/fontawesome.js?digest=8878045cc6db502f8baf"></script>
  <!-- Pre-loaded scripts that we'll load fully later -->
  <link rel="preload" as="script" href="_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />

    <script src="_static/documentation_options.js?v=5929fcd5"></script>
    <script src="_static/doctools.js?v=9a2dae69"></script>
    <script src="_static/sphinx_highlight.js?v=dc90522c"></script>
    <script src="_static/clipboard.min.js?v=a7894cd8"></script>
    <script src="_static/copybutton.js?v=65e89d2a"></script>
    <script>let toggleHintShow = 'Click to show';</script>
    <script>let toggleHintHide = 'Click to hide';</script>
    <script>let toggleOpenOnPrint = 'true';</script>
    <script src="_static/togglebutton.js?v=4a39c7ea"></script>
    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
    <script>DOCUMENTATION_OPTIONS.pagename = 'genindex';</script>
    <script>
        DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
        DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
        DOCUMENTATION_OPTIONS.show_version_warning_banner =
            false;
        </script>
    <link rel="icon" href="_static/favicon.png"/>
    <link rel="index" title="Index" href="#" />
    <link rel="search" title="Search" href="search.html" />

  <meta name="viewport" content="width=device-width, initial-scale=1"/>
  <meta name="docsearch:language" content="en"/>
  <meta name="docsearch:version" content="1.2.0rc2" />


  </head>


  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">


  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>

  <div id="pst-scroll-pixel-helper"></div>

  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
    <i class="fa-solid fa-arrow-up"></i>Back to top</button>


  <dialog id="pst-search-dialog">

<form class="bd-search d-flex align-items-center"
      action="search.html"
      method="get">
  <i class="fa-solid fa-magnifying-glass"></i>
  <input type="search"
         class="form-control"
         name="q"
         placeholder="Search the docs ..."
         aria-label="Search the docs ..."
         autocomplete="off"
         autocorrect="off"
         autocapitalize="off"
         spellcheck="false"/>
  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
</form>
  </dialog>

  <div class="pst-async-banner-revealer d-none">
  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
</div>


    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
<div class="bd-header__inner bd-page-width">
  <button class="pst-navbar-icon sidebar-toggle primary-toggle" aria-label="Site navigation">
    <span class="fa-solid fa-bars"></span>
  </button>


  <div class="col-lg-3 navbar-header-items__start">

      <div class="navbar-item">


<a class="navbar-brand logo" href="index.html">


    <img src="_static/nvidia-logo-horiz-rgb-blk-for-screen.svg" class="logo__image only-light" alt="TensorRT LLM - Home"/>
    <img src="_static/nvidia-logo-horiz-rgb-wht-for-screen.svg" class="logo__image only-dark pst-js-only" alt="TensorRT LLM - Home"/>


    <p class="title logo__title">TensorRT LLM</p>

</a></div>

  </div>

  <div class="col-lg-9 navbar-header-items">

    <div class="me-auto navbar-header-items__center">

        <div class="navbar-item">


<div class="version-switcher__container dropdown pst-js-only">
  <button id="pst-version-switcher-button-2"
    type="button"
    class="version-switcher__button btn btn-sm dropdown-toggle"
    data-bs-toggle="dropdown"
    aria-haspopup="listbox"
    aria-controls="pst-version-switcher-list-2"
    aria-label="Version switcher list"
  >
    Choose version  <!-- this text may get changed later by javascript -->
    <span class="caret"></span>
  </button>
  <div id="pst-version-switcher-list-2"
    class="version-switcher__menu dropdown-menu list-group-flush py-0"
    role="listbox" aria-labelledby="pst-version-switcher-button-2">
    <!-- dropdown will be populated by javascript on page load -->
  </div>
</div></div>

    </div>


    <div class="navbar-header-items__end">

        <div class="navbar-item navbar-persistent--container">


<button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
 <i class="fa-solid fa-magnifying-glass"></i>
 <span class="search-button__default-text">Search</span>
 <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
        </div>


        <div class="navbar-item">

<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode"  data-bs-placement="bottom" data-bs-toggle="tooltip">
  <i class="theme-switch fa-solid fa-sun                fa-lg" data-mode="light" title="Light"></i>
  <i class="theme-switch fa-solid fa-moon               fa-lg" data-mode="dark"  title="Dark"></i>
  <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"  title="System Settings"></i>
</button></div>

    </div>

  </div>


    <div class="navbar-persistent--mobile">

<button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
 <i class="fa-solid fa-magnifying-glass"></i>
 <span class="search-button__default-text">Search</span>
 <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
    </div>


</div>

    </header>


  <div class="bd-container">
    <div class="bd-container__inner bd-page-width">


      <dialog id="pst-primary-sidebar-modal"></dialog>
      <div id="pst-primary-sidebar" class="bd-sidebar-primary bd-sidebar">


<a class="navbar-brand logo" href="index.html">


    <img src="_static/nvidia-logo-horiz-rgb-blk-for-screen.svg" class="logo__image only-light" alt="TensorRT LLM - Home"/>
    <img src="_static/nvidia-logo-horiz-rgb-wht-for-screen.svg" class="logo__image only-dark pst-js-only" alt="TensorRT LLM - Home"/>


    <p class="title logo__title">TensorRT LLM</p>

</a>


  <div class="sidebar-header-items sidebar-primary__section">


      <div class="sidebar-header-items__center">


            <div class="navbar-item">


<div class="version-switcher__container dropdown pst-js-only">
  <button id="pst-version-switcher-button-3"
    type="button"
    class="version-switcher__button btn btn-sm dropdown-toggle"
    data-bs-toggle="dropdown"
    aria-haspopup="listbox"
    aria-controls="pst-version-switcher-list-3"
    aria-label="Version switcher list"
  >
    Choose version  <!-- this text may get changed later by javascript -->
    <span class="caret"></span>
  </button>
  <div id="pst-version-switcher-list-3"
    class="version-switcher__menu dropdown-menu list-group-flush py-0"
    role="listbox" aria-labelledby="pst-version-switcher-button-3">
    <!-- dropdown will be populated by javascript on page load -->
  </div>
</div></div>


      </div>


      <div class="sidebar-header-items__end">

          <div class="navbar-item">

<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode"  data-bs-placement="bottom" data-bs-toggle="tooltip">
  <i class="theme-switch fa-solid fa-sun                fa-lg" data-mode="light" title="Light"></i>
  <i class="theme-switch fa-solid fa-moon               fa-lg" data-mode="dark"  title="Dark"></i>
  <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"  title="System Settings"></i>
</button></div>

      </div>

  </div>

    <div class="sidebar-primary-items__start sidebar-primary__section">
        <div class="sidebar-primary-item">


<nav class="bd-docs-nav bd-links"
     aria-label="Table of Contents">
  <p class="bd-links__title" role="heading" aria-level="1">Table of Contents</p>
  <div class="bd-toc-item navbar-nav"><p aria-level="2" class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
<ul class="nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="overview.html">Overview</a></li>
<li class="toctree-l1"><a class="reference internal" href="quick-start-guide.html">Quick Start Guide</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="installation/index.html">Installation</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="installation/containers.html">Pre-built release container images on NGC</a></li>
<li class="toctree-l2"><a class="reference internal" href="installation/linux.html">Installing on Linux via <code class="docutils literal notranslate"><span class="pre">pip</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="installation/build-from-source-linux.html">Building from Source Code on Linux</a></li>
</ul>
</details></li>
</ul>
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Deployment Guide</span></p>
<ul class="nav bd-sidenav">
<li class="toctree-l1 has-children"><a class="reference internal" href="examples/llm_api_examples.html">LLM Examples</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="examples/llm_inference.html">Generate text</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/llm_inference_async.html">Generate text asynchronously</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/llm_inference_async_streaming.html">Generate text in streaming</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/llm_inference_distributed.html">Distributed LLM Generation</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/llm_guided_decoding.html">Generate text with guided decoding</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/llm_logits_processor.html">Control generated text using logits processor</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/llm_multilora.html">Generate text with multiple LoRA adapters</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/llm_sparse_attention.html">Sparse Attention</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/llm_speculative_decoding.html">Speculative Decoding</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/llm_kv_cache_connector.html">KV Cache Connector</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/llm_kv_cache_offloading.html">KV Cache Offloading</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/llm_runtime.html">Runtime Configuration Examples</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/llm_sampling.html">Sampling Techniques Showcase</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/llm_mgmn_llm_distributed.html">Run LLM-API with pytorch backend on Slurm</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/llm_mgmn_trtllm_bench.html">Run trtllm-bench with pytorch backend on Slurm</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/llm_mgmn_trtllm_serve.html">Run trtllm-serve with pytorch backend on Slurm</a></li>
</ul>
</details></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="examples/trtllm_serve_examples.html">Online Serving Examples</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="examples/curl_chat_client.html">Curl Chat Client</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/curl_chat_client_for_multimodal.html">Curl Chat Client For Multimodal</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/curl_completion_client.html">Curl Completion Client</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/deepseek_r1_reasoning_parser.html">Deepseek R1 Reasoning Parser</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/genai_perf_client.html">Genai Perf Client</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/genai_perf_client_for_multimodal.html">Genai Perf Client For Multimodal</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/openai_chat_client.html">OpenAI Chat Client</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/openai_chat_client_for_multimodal.html">OpenAI Chat Client for Multimodal</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/openai_completion_client.html">OpenAI Completion Client</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/openai_completion_client_for_lora.html">Openai Completion Client For Lora</a></li>
<li class="toctree-l2"><a class="reference internal" href="examples/openai_completion_client_json_schema.html">OpenAI Completion Client with JSON Schema</a></li>
</ul>
</details></li>
<li class="toctree-l1"><a class="reference internal" href="examples/dynamo_k8s_example.html">Dynamo K8s Example</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="deployment-guide/index.html">Model Recipes</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="deployment-guide/deployment-guide-for-deepseek-r1-on-trtllm.html">Deployment Guide for DeepSeek R1 on TensorRT LLM - Blackwell &amp; Hopper Hardware</a></li>
<li class="toctree-l2"><a class="reference internal" href="deployment-guide/deployment-guide-for-llama3.3-70b-on-trtllm.html">Deployment Guide for Llama3.3 70B on TensorRT LLM - Blackwell &amp; Hopper Hardware</a></li>
<li class="toctree-l2"><a class="reference internal" href="deployment-guide/deployment-guide-for-llama4-scout-on-trtllm.html">Deployment Guide for Llama4 Scout 17B on TensorRT LLM - Blackwell &amp; Hopper Hardware</a></li>
<li class="toctree-l2"><a class="reference internal" href="deployment-guide/deployment-guide-for-gpt-oss-on-trtllm.html">Deployment Guide for GPT-OSS on TensorRT-LLM - Blackwell Hardware</a></li>
<li class="toctree-l2"><a class="reference internal" href="deployment-guide/deployment-guide-for-qwen3-next-on-trtllm.html">Deployment Guide for Qwen3 Next on TensorRT LLM - Blackwell &amp; Hopper Hardware</a></li>
</ul>
</details></li>
</ul>
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Models</span></p>
<ul class="nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="models/supported-models.html">Supported Models</a></li>

<li class="toctree-l1"><a class="reference internal" href="models/adding-new-model.html">Adding a New Model</a></li>
</ul>
<p aria-level="2" class="caption" role="heading"><span class="caption-text">CLI Reference</span></p>
<ul class="nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="commands/trtllm-bench.html">trtllm-bench</a></li>

<li class="toctree-l1"><a class="reference internal" href="commands/trtllm-eval.html">trtllm-eval</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="commands/trtllm-serve/index.html">trtllm-serve</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="commands/trtllm-serve/trtllm-serve.html">trtllm-serve</a></li>
<li class="toctree-l2"><a class="reference internal" href="commands/trtllm-serve/run-benchmark-with-trtllm-serve.html">Run benchmarking with <code class="docutils literal notranslate"><span class="pre">trtllm-serve</span></code></a></li>
</ul>
</details></li>
</ul>
<p aria-level="2" class="caption" role="heading"><span class="caption-text">API Reference</span></p>
<ul class="nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="llm-api/index.html">LLM API Introduction</a></li>
<li class="toctree-l1"><a class="reference internal" href="llm-api/reference.html">API Reference</a></li>
</ul>
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Features</span></p>
<ul class="nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="features/feature-combination-matrix.html">Feature Combination Matrix</a></li>
<li class="toctree-l1"><a class="reference internal" href="features/attention.html">Multi-Head, Multi-Query, and Group-Query Attention</a></li>
<li class="toctree-l1"><a class="reference internal" href="features/disagg-serving.html">Disaggregated Serving</a></li>
<li class="toctree-l1"><a class="reference internal" href="features/kvcache.html">KV Cache System</a></li>
<li class="toctree-l1"><a class="reference internal" href="features/long-sequence.html">Long Sequences</a></li>
<li class="toctree-l1"><a class="reference internal" href="features/lora.html">LoRA (Low-Rank Adaptation)</a></li>
<li class="toctree-l1"><a class="reference internal" href="features/multi-modality.html">Multimodal Support in TensorRT LLM</a></li>
<li class="toctree-l1"><a class="reference internal" href="features/overlap-scheduler.html">Overlap Scheduler</a></li>
<li class="toctree-l1"><a class="reference internal" href="features/paged-attention-ifb-scheduler.html">Paged Attention, IFB, and Request Scheduling</a></li>
<li class="toctree-l1"><a class="reference internal" href="features/parallel-strategy.html">Parallelism in TensorRT LLM</a></li>
<li class="toctree-l1"><a class="reference internal" href="features/quantization.html">Quantization</a></li>
<li class="toctree-l1"><a class="reference internal" href="features/sampling.html">Sampling</a></li>
<li class="toctree-l1"><a class="reference internal" href="features/additional-outputs.html">Additional Outputs</a></li>
<li class="toctree-l1"><a class="reference internal" href="features/speculative-decoding.html">Speculative Decoding</a></li>
<li class="toctree-l1"><a class="reference internal" href="features/checkpoint-loading.html">Checkpoint Loading</a></li>
<li class="toctree-l1"><a class="reference internal" href="features/auto_deploy/auto-deploy.html">AutoDeploy (Prototype)</a></li>
<li class="toctree-l1"><a class="reference internal" href="features/ray-orchestrator.html">Ray Orchestrator (Prototype)</a></li>
<li class="toctree-l1"><a class="reference internal" href="features/torch_compile_and_piecewise_cuda_graph.html">Torch Compile &amp; Piecewise CUDA Graph</a></li>
</ul>
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Developer Guide</span></p>
<ul class="nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="developer-guide/overview.html">Architecture Overview</a></li>
<li class="toctree-l1"><a class="reference internal" href="developer-guide/perf-analysis.html">Performance Analysis</a></li>
<li class="toctree-l1"><a class="reference internal" href="developer-guide/perf-benchmarking.html">TensorRT LLM Benchmarking</a></li>
<li class="toctree-l1"><a class="reference internal" href="developer-guide/ci-overview.html">Continuous Integration Overview</a></li>
<li class="toctree-l1"><a class="reference internal" href="developer-guide/dev-containers.html">Using Dev Containers</a></li>
<li class="toctree-l1"><a class="reference internal" href="developer-guide/api-change.html">LLM API Change Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="developer-guide/kv-transfer.html">Introduction to KV Cache Transmission</a></li>
</ul>
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Blogs</span></p>
<ul class="nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="blogs/tech_blog/blog10_ADP_Balance_Strategy.html">ADP Balance Strategy</a></li>
<li class="toctree-l1"><a class="reference internal" href="blogs/tech_blog/blog11_GPT_OSS_Eagle3.html">Running GPT-OSS-120B with Eagle3 Speculative Decoding on GB200/B200 (TensorRT LLM)</a></li>
<li class="toctree-l1"><a class="reference internal" href="blogs/tech_blog/blog12_Combining_Guided_Decoding_and_Speculative_Decoding.html">Combining Guided Decoding and Speculative Decoding: Making CPU and GPU Cooperate Seamlessly</a></li>
<li class="toctree-l1"><a class="reference internal" href="blogs/tech_blog/blog13_Inference_Time_Compute_Implementation_in_TensorRT-LLM.html">Inference Time Compute Implementation in TensorRT LLM</a></li>
<li class="toctree-l1"><a class="reference internal" href="blogs/tech_blog/blog14_Scaling_Expert_Parallelism_in_TensorRT-LLM_part3.html">Scaling Expert Parallelism in TensorRT LLM (Part 3: Pushing the Performance Boundary)</a></li>
<li class="toctree-l1"><a class="reference internal" href="blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.html">Pushing Latency Boundaries: Optimizing DeepSeek-R1 Performance on NVIDIA B200 GPUs</a></li>
<li class="toctree-l1"><a class="reference internal" href="blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization.html">DeepSeek R1 MTP Implementation and Optimization</a></li>
<li class="toctree-l1"><a class="reference internal" href="blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.html">Optimizing DeepSeek R1 Throughput on NVIDIA Blackwell GPUs: A Deep Dive for Developers</a></li>
<li class="toctree-l1"><a class="reference internal" href="blogs/tech_blog/blog4_Scaling_Expert_Parallelism_in_TensorRT-LLM.html">Scaling Expert Parallelism in TensorRT LLM (Part 1: Design and Implementation of Large-scale EP)</a></li>
<li class="toctree-l1"><a class="reference internal" href="blogs/tech_blog/blog5_Disaggregated_Serving_in_TensorRT-LLM.html">Disaggregated Serving in TensorRT LLM</a></li>
<li class="toctree-l1"><a class="reference internal" href="blogs/tech_blog/blog6_Llama4_maverick_eagle_guide.html">How to launch Llama4 Maverick + Eagle3 TensorRT LLM server</a></li>
<li class="toctree-l1"><a class="reference internal" href="blogs/tech_blog/blog7_NGram_performance_Analysis_And_Auto_Enablement.html">N-Gram Speculative Decoding in TensorRT LLM</a></li>
<li class="toctree-l1"><a class="reference internal" href="blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT LLM (Part 2: Performance Status and Optimization)</a></li>
<li class="toctree-l1"><a class="reference internal" href="blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT LLM</a></li>
<li class="toctree-l1"><a class="reference internal" href="blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM.html">How to get best performance on DeepSeek-R1 in TensorRT LLM</a></li>
<li class="toctree-l1"><a class="reference internal" href="blogs/H200launch.html">H200 achieves nearly 12,000 tokens/sec on Llama2-13B with TensorRT LLM</a></li>
<li class="toctree-l1"><a class="reference internal" href="blogs/XQA-kernel.html">New XQA-kernel provides 2.4x more Llama-70B throughput within the same latency budget</a></li>
<li class="toctree-l1"><a class="reference internal" href="blogs/H100vsA100.html">H100 has 4.6x A100 Performance in TensorRT LLM, achieving 10,000 tok/s at 100ms to first token</a></li>
</ul>
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Quick Links</span></p>
<ul class="nav bd-sidenav">
<li class="toctree-l1"><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/releases">Releases</a></li>
<li class="toctree-l1"><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM">Github Code</a></li>
<li class="toctree-l1"><a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/issues?q=is%3Aissue%20state%3Aopen%20label%3Aroadmap">Roadmap</a></li>
</ul>
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
<ul class="nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
</ul>
</div>
</nav></div>
    </div>


  <div class="sidebar-primary-items__end sidebar-primary__section">
  </div>


      </div>

      <main id="main-content" class="bd-main" role="main">


          <div class="bd-content">
            <div class="bd-article-container">

              <div class="bd-header-article d-print-none"></div>


<div id="searchbox"></div>
                <article class="bd-article">


<h1 id="index">Index</h1>

<div class="genindex-jumpbox">
 <a href="#Symbols"><strong>Symbols</strong></a>
 | <a href="#_"><strong>_</strong></a>
 | <a href="#A"><strong>A</strong></a>
 | <a href="#B"><strong>B</strong></a>
 | <a href="#C"><strong>C</strong></a>
 | <a href="#D"><strong>D</strong></a>
 | <a href="#E"><strong>E</strong></a>
 | <a href="#F"><strong>F</strong></a>
 | <a href="#G"><strong>G</strong></a>
 | <a href="#H"><strong>H</strong></a>
 | <a href="#I"><strong>I</strong></a>
 | <a href="#J"><strong>J</strong></a>
 | <a href="#K"><strong>K</strong></a>
 | <a href="#L"><strong>L</strong></a>
 | <a href="#M"><strong>M</strong></a>
 | <a href="#N"><strong>N</strong></a>
 | <a href="#O"><strong>O</strong></a>
 | <a href="#P"><strong>P</strong></a>
 | <a href="#Q"><strong>Q</strong></a>
 | <a href="#R"><strong>R</strong></a>
 | <a href="#S"><strong>S</strong></a>
 | <a href="#T"><strong>T</strong></a>
 | <a href="#U"><strong>U</strong></a>
 | <a href="#V"><strong>V</strong></a>
 | <a href="#W"><strong>W</strong></a>
 | <a href="#Y"><strong>Y</strong></a>
 | <a href="#Z"><strong>Z</strong></a>

</div>
<h2 id="Symbols">Symbols</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li>
    --accuracy_threshold

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmlu-accuracy_threshold">trtllm-eval-mmlu command line option</a>
</li>
      </ul></li>
      <li>
    --apply_chat_template

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-cnn_dailymail-apply_chat_template">trtllm-eval-cnn_dailymail command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_diamond-apply_chat_template">trtllm-eval-gpqa_diamond command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_extended-apply_chat_template">trtllm-eval-gpqa_extended command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_main-apply_chat_template">trtllm-eval-gpqa_main command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gsm8k-apply_chat_template">trtllm-eval-gsm8k command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-longbench_v2-apply_chat_template">trtllm-eval-longbench_v2 command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmlu-apply_chat_template">trtllm-eval-mmlu command line option</a>
</li>
      </ul></li>
      <li>
    --backend

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-backend">trtllm-bench-latency command line option</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-backend">trtllm-bench-throughput command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-backend">trtllm-eval command line option</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-backend">trtllm-serve-serve command line option</a>
</li>
      </ul></li>
      <li>
    --beam_width

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-beam_width">trtllm-bench-latency command line option</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-beam_width">trtllm-bench-throughput command line option</a>
</li>
      </ul></li>
      <li>
    --chat_template_kwargs

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_diamond-chat_template_kwargs">trtllm-eval-gpqa_diamond command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_extended-chat_template_kwargs">trtllm-eval-gpqa_extended command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_main-chat_template_kwargs">trtllm-eval-gpqa_main command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gsm8k-chat_template_kwargs">trtllm-eval-gsm8k command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmlu-chat_template_kwargs">trtllm-eval-mmlu command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmmu-chat_template_kwargs">trtllm-eval-mmmu command line option</a>
</li>
      </ul></li>
      <li>
    --check_accuracy

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmlu-check_accuracy">trtllm-eval-mmlu command line option</a>
</li>
      </ul></li>
      <li>
    --cluster_size

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-cluster_size">trtllm-bench-throughput command line option</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-cluster_size">trtllm-serve-serve command line option</a>
</li>
      </ul></li>
      <li>
    --concurrency

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-concurrency">trtllm-bench-latency command line option</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-concurrency">trtllm-bench-throughput command line option</a>
</li>
      </ul></li>
      <li>
    --config_file

      <ul>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-c">trtllm-serve-disaggregated command line option</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated_mpi_worker-c">trtllm-serve-disaggregated_mpi_worker command line option</a>
</li>
      </ul></li>
      <li>
    --cot

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-longbench_v2-cot">trtllm-eval-longbench_v2 command line option</a>
</li>
      </ul></li>
      <li>
    --custom_module_dirs

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-custom_module_dirs">trtllm-bench-throughput command line option</a>
</li>
      </ul></li>
      <li>
    --data_device

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-data_device">trtllm-bench-throughput command line option</a>
</li>
      </ul></li>
      <li>
    --dataset

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-build-dataset">trtllm-bench-build command line option</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-dataset">trtllm-bench-latency command line option</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-dataset">trtllm-bench-throughput command line option</a>
</li>
      </ul></li>
      <li>
    --dataset_path

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-cnn_dailymail-dataset_path">trtllm-eval-cnn_dailymail command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_diamond-dataset_path">trtllm-eval-gpqa_diamond command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_extended-dataset_path">trtllm-eval-gpqa_extended command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_main-dataset_path">trtllm-eval-gpqa_main command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gsm8k-dataset_path">trtllm-eval-gsm8k command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-json_mode_eval-dataset_path">trtllm-eval-json_mode_eval command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-longbench_v2-dataset_path">trtllm-eval-longbench_v2 command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmlu-dataset_path">trtllm-eval-mmlu command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmmu-dataset_path">trtllm-eval-mmmu command line option</a>
</li>
      </ul></li>
      <li>
    --difficulty

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-longbench_v2-difficulty">trtllm-eval-longbench_v2 command line option</a>
</li>
      </ul></li>
      <li>
    --disable_chunked_context

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-enable_chunked_context">trtllm-bench-throughput command line option</a>
</li>
      </ul></li>
      <li>
    --disable_kv_cache_reuse

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-disable_kv_cache_reuse">trtllm-eval command line option</a>
</li>
      </ul></li>
      <li>
    --disagg_cluster_uri

      <ul>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-disagg_cluster_uri">trtllm-serve-serve command line option</a>
</li>
      </ul></li>
      <li>
    --domain

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-longbench_v2-domain">trtllm-eval-longbench_v2 command line option</a>
</li>
      </ul></li>
      <li>
    --enable_chunked_context

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-enable_chunked_context">trtllm-bench-throughput command line option</a>
</li>
      </ul></li>
      <li>
    --enable_chunked_prefill

      <ul>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-enable_chunked_prefill">trtllm-serve-serve command line option</a>
</li>
      </ul></li>
      <li>
    --engine_dir

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-engine_dir">trtllm-bench-latency command line option</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-engine_dir">trtllm-bench-throughput command line option</a>
</li>
      </ul></li>
      <li>
    --eos_id

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-eos_id">trtllm-bench-throughput command line option</a>
</li>
      </ul></li>
      <li>
    --ep

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-ep">trtllm-bench-latency command line option</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-ep">trtllm-bench-throughput command line option</a>
</li>
      </ul></li>
      <li>
    --ep_size

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-ep_size">trtllm-eval command line option</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-ep_size">trtllm-serve-serve command line option</a>
</li>
      </ul></li>
      <li>
    --extra_encoder_options

      <ul>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-mm_embedding_serve-extra_encoder_options">trtllm-serve-mm_embedding_serve command line option</a>
</li>
      </ul></li>
      <li>
    --extra_llm_api_options

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-extra_llm_api_options">trtllm-bench-latency command line option</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-extra_llm_api_options">trtllm-bench-throughput command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-extra_llm_api_options">trtllm-eval command line option</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-extra_llm_api_options">trtllm-serve-serve command line option</a>
</li>
      </ul></li>
      <li>
    --fail_fast_on_attention_window_too_large

      <ul>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-fail_fast_on_attention_window_too_large">trtllm-serve-serve command line option</a>
</li>
      </ul></li>
      <li>
    --fewshot_as_multiturn

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gsm8k-fewshot_as_multiturn">trtllm-eval-gsm8k command line option</a>
</li>
      </ul></li>
      <li>
    --gpus_per_node

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpus_per_node">trtllm-eval command line option</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-mm_embedding_serve-gpus_per_node">trtllm-serve-mm_embedding_serve command line option</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-gpus_per_node">trtllm-serve-serve command line option</a>
</li>
      </ul></li>
      <li>
    --host

      <ul>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-mm_embedding_serve-host">trtllm-serve-mm_embedding_serve command line option</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-host">trtllm-serve-serve command line option</a>
</li>
      </ul></li>
      <li>
    --image_data_format

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-image_data_format">trtllm-bench-throughput command line option</a>
</li>
      </ul></li>
      <li>
    --iteration_log

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-iteration_log">trtllm-bench-latency command line option</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-iteration_log">trtllm-bench-throughput command line option</a>
</li>
      </ul></li>
      <li>
    --kv_cache_free_gpu_mem_fraction

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-kv_cache_free_gpu_mem_fraction">trtllm-bench-latency command line option</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-kv_cache_free_gpu_mem_fraction">trtllm-bench-throughput command line option</a>
</li>
      </ul></li>
      <li>
    --kv_cache_free_gpu_memory_fraction

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-kv_cache_free_gpu_memory_fraction">trtllm-eval command line option</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-kv_cache_free_gpu_memory_fraction">trtllm-serve-serve command line option</a>
</li>
      </ul></li>
      <li>
    --length

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-longbench_v2-length">trtllm-eval-longbench_v2 command line option</a>
</li>
      </ul></li>
      <li>
    --log_level

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-log_level">trtllm-bench command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-log_level">trtllm-eval command line option</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-l">trtllm-serve-disaggregated command line option</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated_mpi_worker-log_level">trtllm-serve-disaggregated_mpi_worker command line option</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-mm_embedding_serve-log_level">trtllm-serve-mm_embedding_serve command line option</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-log_level">trtllm-serve-serve command line option</a>
</li>
      </ul></li>
      <li>
    --max_batch_size

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-build-max_batch_size">trtllm-bench-build command line option</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-max_batch_size">trtllm-bench-throughput command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-max_batch_size">trtllm-eval command line option</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-mm_embedding_serve-max_batch_size">trtllm-serve-mm_embedding_serve command line option</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-max_batch_size">trtllm-serve-serve command line option</a>
</li>
      </ul></li>
      <li>
    --max_beam_width

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-max_beam_width">trtllm-eval command line option</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-max_beam_width">trtllm-serve-serve command line option</a>
</li>
      </ul></li>
      <li>
    --max_input_len

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-max_input_len">trtllm-bench-latency command line option</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-max_input_len">trtllm-bench-throughput command line option</a>
</li>
      </ul></li>
      <li>
    --max_input_length

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-cnn_dailymail-max_input_length">trtllm-eval-cnn_dailymail command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_diamond-max_input_length">trtllm-eval-gpqa_diamond command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_extended-max_input_length">trtllm-eval-gpqa_extended command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_main-max_input_length">trtllm-eval-gpqa_main command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gsm8k-max_input_length">trtllm-eval-gsm8k command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-json_mode_eval-max_input_length">trtllm-eval-json_mode_eval command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-longbench_v2-max_input_length">trtllm-eval-longbench_v2 command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmlu-max_input_length">trtllm-eval-mmlu command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmmu-max_input_length">trtllm-eval-mmmu command line option</a>
</li>
      </ul></li>
      <li>
    --max_len

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-longbench_v2-max_len">trtllm-eval-longbench_v2 command line option</a>
</li>
      </ul></li>
      <li>
    --max_num_tokens

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-build-max_num_tokens">trtllm-bench-build command line option</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-max_num_tokens">trtllm-bench-throughput command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-max_num_tokens">trtllm-eval command line option</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-mm_embedding_serve-max_num_tokens">trtllm-serve-mm_embedding_serve command line option</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-max_num_tokens">trtllm-serve-serve command line option</a>
</li>
      </ul></li>
      <li>
    --max_output_length

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-cnn_dailymail-max_output_length">trtllm-eval-cnn_dailymail command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_diamond-max_output_length">trtllm-eval-gpqa_diamond command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_extended-max_output_length">trtllm-eval-gpqa_extended command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_main-max_output_length">trtllm-eval-gpqa_main command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gsm8k-max_output_length">trtllm-eval-gsm8k command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-json_mode_eval-max_output_length">trtllm-eval-json_mode_eval command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-longbench_v2-max_output_length">trtllm-eval-longbench_v2 command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmlu-max_output_length">trtllm-eval-mmlu command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmmu-max_output_length">trtllm-eval-mmmu command line option</a>
</li>
      </ul></li>
      <li>
    --max_seq_len

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-build-max_seq_len">trtllm-bench-build command line option</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-max_seq_len">trtllm-bench-latency command line option</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-max_seq_len">trtllm-bench-throughput command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-max_seq_len">trtllm-eval command line option</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-max_seq_len">trtllm-serve-serve command line option</a>
</li>
      </ul></li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li>
    --media_io_kwargs

      <ul>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-media_io_kwargs">trtllm-serve-serve command line option</a>
</li>
      </ul></li>
      <li>
    --medusa_choices

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-medusa_choices">trtllm-bench-latency command line option</a>
</li>
      </ul></li>
      <li>
    --metadata_server_config_file

      <ul>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-m">trtllm-serve-disaggregated command line option</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-mm_embedding_serve-metadata_server_config_file">trtllm-serve-mm_embedding_serve command line option</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-metadata_server_config_file">trtllm-serve-serve command line option</a>
</li>
      </ul></li>
      <li>
    --metrics-log-interval

      <ul>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-metrics-log-interval">trtllm-serve-disaggregated command line option</a>
</li>
      </ul></li>
      <li>
    --modality

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-modality">trtllm-bench-latency command line option</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-modality">trtllm-bench-throughput command line option</a>
</li>
      </ul></li>
      <li>
    --model

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-m">trtllm-bench command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-model">trtllm-eval command line option</a>
</li>
      </ul></li>
      <li>
    --model_path

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-model_path">trtllm-bench command line option</a>
</li>
      </ul></li>
      <li>
    --no_context

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-longbench_v2-no_context">trtllm-eval-longbench_v2 command line option</a>
</li>
      </ul></li>
      <li>
    --no_skip_tokenizer_init

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-no_skip_tokenizer_init">trtllm-bench-throughput command line option</a>
</li>
      </ul></li>
      <li>
    --no_weights_loading

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-build-no_weights_loading">trtllm-bench-build command line option</a>
</li>
      </ul></li>
      <li>
    --num_fewshot

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmlu-num_fewshot">trtllm-eval-mmlu command line option</a>
</li>
      </ul></li>
      <li>
    --num_postprocess_workers

      <ul>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-num_postprocess_workers">trtllm-serve-serve command line option</a>
</li>
      </ul></li>
      <li>
    --num_requests

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-num_requests">trtllm-bench-latency command line option</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-num_requests">trtllm-bench-throughput command line option</a>
</li>
      </ul></li>
      <li>
    --num_samples

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-cnn_dailymail-num_samples">trtllm-eval-cnn_dailymail command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_diamond-num_samples">trtllm-eval-gpqa_diamond command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_extended-num_samples">trtllm-eval-gpqa_extended command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_main-num_samples">trtllm-eval-gpqa_main command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gsm8k-num_samples">trtllm-eval-gsm8k command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-json_mode_eval-num_samples">trtllm-eval-json_mode_eval command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-longbench_v2-num_samples">trtllm-eval-longbench_v2 command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmlu-num_samples">trtllm-eval-mmlu command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmmu-num_samples">trtllm-eval-mmmu command line option</a>
</li>
      </ul></li>
      <li>
    --otlp_traces_endpoint

      <ul>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-otlp_traces_endpoint">trtllm-serve-serve command line option</a>
</li>
      </ul></li>
      <li>
    --output_dir

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-longbench_v2-output_dir">trtllm-eval-longbench_v2 command line option</a>
</li>
      </ul></li>
      <li>
    --output_json

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-output_json">trtllm-bench-throughput command line option</a>
</li>
      </ul></li>
      <li>
    --port

      <ul>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-mm_embedding_serve-port">trtllm-serve-mm_embedding_serve command line option</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-port">trtllm-serve-serve command line option</a>
</li>
      </ul></li>
      <li>
    --pp

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-pp">trtllm-bench-latency command line option</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-pp">trtllm-bench-throughput command line option</a>
</li>
      </ul></li>
      <li>
    --pp_size

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-build-pp">trtllm-bench-build command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-pp_size">trtllm-eval command line option</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-pp_size">trtllm-serve-serve command line option</a>
</li>
      </ul></li>
      <li>
    --prompts_dir

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-longbench_v2-prompts_dir">trtllm-eval-longbench_v2 command line option</a>
</li>
      </ul></li>
      <li>
    --quantization

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-build-q">trtllm-bench-build command line option</a>
</li>
      </ul></li>
      <li>
    --rag

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-longbench_v2-rag">trtllm-eval-longbench_v2 command line option</a>
</li>
      </ul></li>
      <li>
    --random_seed

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-cnn_dailymail-random_seed">trtllm-eval-cnn_dailymail command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_diamond-random_seed">trtllm-eval-gpqa_diamond command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_extended-random_seed">trtllm-eval-gpqa_extended command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_main-random_seed">trtllm-eval-gpqa_main command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gsm8k-random_seed">trtllm-eval-gsm8k command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-json_mode_eval-random_seed">trtllm-eval-json_mode_eval command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-longbench_v2-random_seed">trtllm-eval-longbench_v2 command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmlu-random_seed">trtllm-eval-mmlu command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmmu-random_seed">trtllm-eval-mmmu command line option</a>
</li>
      </ul></li>
      <li>
    --reasoning_parser

      <ul>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-reasoning_parser">trtllm-serve-serve command line option</a>
</li>
      </ul></li>
      <li>
    --report_json

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-report_json">trtllm-bench-latency command line option</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-report_json">trtllm-bench-throughput command line option</a>
</li>
      </ul></li>
      <li>
    --request_json

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-request_json">trtllm-bench-throughput command line option</a>
</li>
      </ul></li>
      <li>
    --request_timeout

      <ul>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-r">trtllm-serve-disaggregated command line option</a>
</li>
      </ul></li>
      <li>
    --rouge_path

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-cnn_dailymail-rouge_path">trtllm-eval-cnn_dailymail command line option</a>
</li>
      </ul></li>
      <li>
    --sampler_options

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-sampler_options">trtllm-bench-latency command line option</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-sampler_options">trtllm-bench-throughput command line option</a>
</li>
      </ul></li>
      <li>
    --scheduler_policy

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-scheduler_policy">trtllm-bench-throughput command line option</a>
</li>
      </ul></li>
      <li>
    --server_role

      <ul>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-server_role">trtllm-serve-serve command line option</a>
</li>
      </ul></li>
      <li>
    --server_start_timeout

      <ul>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-t">trtllm-serve-disaggregated command line option</a>
</li>
      </ul></li>
      <li>
    --start_idx

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-longbench_v2-start_idx">trtllm-eval-longbench_v2 command line option</a>
</li>
      </ul></li>
      <li>
    --streaming

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-streaming">trtllm-bench-throughput command line option</a>
</li>
      </ul></li>
      <li>
    --system_prompt

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-cnn_dailymail-system_prompt">trtllm-eval-cnn_dailymail command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_diamond-system_prompt">trtllm-eval-gpqa_diamond command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_extended-system_prompt">trtllm-eval-gpqa_extended command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_main-system_prompt">trtllm-eval-gpqa_main command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gsm8k-system_prompt">trtllm-eval-gsm8k command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-json_mode_eval-system_prompt">trtllm-eval-json_mode_eval command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-longbench_v2-system_prompt">trtllm-eval-longbench_v2 command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmlu-system_prompt">trtllm-eval-mmlu command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmmu-system_prompt">trtllm-eval-mmmu command line option</a>
</li>
      </ul></li>
      <li>
    --target_input_len

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-build-target_input_len">trtllm-bench-build command line option</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-target_input_len">trtllm-bench-throughput command line option</a>
</li>
      </ul></li>
      <li>
    --target_output_len

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-build-target_output_len">trtllm-bench-build command line option</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-target_output_len">trtllm-bench-throughput command line option</a>
</li>
      </ul></li>
      <li>
    --tokenizer

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-tokenizer">trtllm-eval command line option</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-tokenizer">trtllm-serve-serve command line option</a>
</li>
      </ul></li>
      <li>
    --tool_parser

      <ul>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-tool_parser">trtllm-serve-serve command line option</a>
</li>
      </ul></li>
      <li>
    --tp

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-tp">trtllm-bench-latency command line option</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-tp">trtllm-bench-throughput command line option</a>
</li>
      </ul></li>
      <li>
    --tp_size

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-build-tp">trtllm-bench-build command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-tp_size">trtllm-eval command line option</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-tp_size">trtllm-serve-serve command line option</a>
</li>
      </ul></li>
      <li>
    --trust_remote_code

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-build-trust_remote_code">trtllm-bench-build command line option</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-trust_remote_code">trtllm-eval command line option</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-mm_embedding_serve-trust_remote_code">trtllm-serve-mm_embedding_serve command line option</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-trust_remote_code">trtllm-serve-serve command line option</a>
</li>
      </ul></li>
      <li>
    --warmup

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-warmup">trtllm-bench-latency command line option</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-warmup">trtllm-bench-throughput command line option</a>
</li>
      </ul></li>
      <li>
    --workspace

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-w">trtllm-bench command line option</a>
</li>
      </ul></li>
      <li>
    -c

      <ul>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-c">trtllm-serve-disaggregated command line option</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated_mpi_worker-c">trtllm-serve-disaggregated_mpi_worker command line option</a>
</li>
      </ul></li>
      <li>
    -l

      <ul>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-l">trtllm-serve-disaggregated command line option</a>
</li>
      </ul></li>
      <li>
    -m

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-m">trtllm-bench command line option</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-m">trtllm-serve-disaggregated command line option</a>
</li>
      </ul></li>
      <li>
    -pp

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-build-pp">trtllm-bench-build command line option</a>
</li>
      </ul></li>
      <li>
    -q

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-build-q">trtllm-bench-build command line option</a>
</li>
      </ul></li>
      <li>
    -r

      <ul>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-r">trtllm-serve-disaggregated command line option</a>
</li>
      </ul></li>
      <li>
    -t

      <ul>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-t">trtllm-serve-disaggregated command line option</a>
</li>
      </ul></li>
      <li>
    -tp

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-build-tp">trtllm-bench-build command line option</a>
</li>
      </ul></li>
      <li>
    -w

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-w">trtllm-bench command line option</a>
</li>
      </ul></li>
  </ul></td>
</tr></table>

<h2 id="_">_</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheRetentionConfig.__init__">__init__ (tensorrt_llm.llmapi.KvCacheRetentionConfig attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.__init__">(tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.__init__">__init__() (tensorrt_llm.llmapi.AttentionDpConfig method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.__init__">(tensorrt_llm.llmapi.AutoDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.__init__">(tensorrt_llm.llmapi.BatchingType method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildCacheConfig.__init__">(tensorrt_llm.llmapi.BuildCacheConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.__init__">(tensorrt_llm.llmapi.BuildConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.__init__">(tensorrt_llm.llmapi.CacheTransceiverConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.__init__">(tensorrt_llm.llmapi.CalibConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.__init__">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CompletionOutput.__init__">(tensorrt_llm.llmapi.CompletionOutput method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.__init__">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig.__init__">(tensorrt_llm.llmapi.CudaGraphConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.__init__">(tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DisaggregatedParams.__init__">(tensorrt_llm.llmapi.DisaggregatedParams method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.__init__">(tensorrt_llm.llmapi.DraftTargetDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.__init__">(tensorrt_llm.llmapi.DynamicBatchConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.__init__">(tensorrt_llm.llmapi.EagleDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.__init__">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.GuidedDecodingParams.__init__">(tensorrt_llm.llmapi.GuidedDecodingParams method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.__init__">(tensorrt_llm.llmapi.KvCacheConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.__init__">(tensorrt_llm.llmapi.LLM method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.__init__">(tensorrt_llm.llmapi.LookaheadDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LoRARequest.__init__">(tensorrt_llm.llmapi.LoRARequest method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.__init__">(tensorrt_llm.llmapi.MedusaDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.__init__">(tensorrt_llm.llmapi.MoeConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MpiCommSession.__init__">(tensorrt_llm.llmapi.MpiCommSession method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.__init__">(tensorrt_llm.llmapi.MTPDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MultimodalEncoder.__init__">(tensorrt_llm.llmapi.MultimodalEncoder method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.__init__">(tensorrt_llm.llmapi.NGramDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.__init__">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.__init__">(tensorrt_llm.llmapi.QuantConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestError.__init__">(tensorrt_llm.llmapi.RequestError method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.__init__">(tensorrt_llm.llmapi.RequestOutput method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.PostprocWorker.__init__">(tensorrt_llm.llmapi.RequestOutput.PostprocWorker method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input.__init__">(tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.__init__">(tensorrt_llm.llmapi.RocketSparseAttentionConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.__init__">(tensorrt_llm.llmapi.SamplingParams method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.__init__">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.__init__">(tensorrt_llm.llmapi.SchedulerConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.__init__">(tensorrt_llm.llmapi.TorchCompileConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.__init__">(tensorrt_llm.llmapi.TorchLlmArgs method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.__init__">(tensorrt_llm.llmapi.TrtLlmArgs method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.__init__">(tensorrt_llm.llmapi.UserProvidedDecodingConfig method)</a>
</li>
      </ul></li>
  </ul></td>
</tr></table>

<h2 id="A">A</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MpiCommSession.abort">abort() (tensorrt_llm.llmapi.MpiCommSession method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.abort">(tensorrt_llm.llmapi.RequestOutput method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.aborted">aborted() (tensorrt_llm.llmapi.RequestOutput method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.abs">abs() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.abs">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.acceptance_length_threshold">acceptance_length_threshold (tensorrt_llm.llmapi.AutoDecodingConfig attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.acceptance_length_threshold">(tensorrt_llm.llmapi.DraftTargetDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.acceptance_length_threshold">(tensorrt_llm.llmapi.EagleDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.acceptance_length_threshold">(tensorrt_llm.llmapi.LookaheadDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.acceptance_length_threshold">(tensorrt_llm.llmapi.MedusaDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.acceptance_length_threshold">(tensorrt_llm.llmapi.MTPDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.acceptance_length_threshold">(tensorrt_llm.llmapi.NGramDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.acceptance_length_threshold">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.acceptance_length_threshold">(tensorrt_llm.llmapi.UserProvidedDecodingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.acceptance_window">acceptance_window (tensorrt_llm.llmapi.AutoDecodingConfig attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.acceptance_window">(tensorrt_llm.llmapi.DraftTargetDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.acceptance_window">(tensorrt_llm.llmapi.EagleDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.acceptance_window">(tensorrt_llm.llmapi.LookaheadDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.acceptance_window">(tensorrt_llm.llmapi.MedusaDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.acceptance_window">(tensorrt_llm.llmapi.MTPDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.acceptance_window">(tensorrt_llm.llmapi.NGramDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.acceptance_window">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.acceptance_window">(tensorrt_llm.llmapi.UserProvidedDecodingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.activation">activation() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.normalization.AdaLayerNorm">AdaLayerNorm (class in tensorrt_llm.layers.normalization)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.normalization.AdaLayerNormContinuous">AdaLayerNormContinuous (class in tensorrt_llm.layers.normalization)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.normalization.AdaLayerNormZero">AdaLayerNormZero (class in tensorrt_llm.layers.normalization)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.normalization.AdaLayerNormZeroSingle">AdaLayerNormZeroSingle (class in tensorrt_llm.layers.normalization)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LoRARequest.adapter_id">adapter_id (tensorrt_llm.llmapi.LoRARequest property)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.add">add() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Conditional.add_input">add_input() (tensorrt_llm.functional.Conditional method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestError.add_note">add_note() (tensorrt_llm.llmapi.RequestError method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Conditional.add_output">add_output() (tensorrt_llm.functional.Conditional method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.KVCacheManager.add_sequence">add_sequence() (tensorrt_llm.runtime.KVCacheManager method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.add_special_tokens">add_special_tokens (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CompletionOutput.additional_context_outputs">additional_context_outputs (tensorrt_llm.llmapi.CompletionOutput attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CompletionOutput.additional_generation_outputs">additional_generation_outputs (tensorrt_llm.llmapi.CompletionOutput attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.additional_model_outputs">additional_model_outputs (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.algorithm">algorithm (tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.algorithm">(tensorrt_llm.llmapi.RocketSparseAttentionConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.alibi">alibi (tensorrt_llm.functional.PositionEmbeddingType attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.alibi_with_scale">alibi_with_scale (tensorrt_llm.functional.PositionEmbeddingType attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.allgather">allgather() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.allreduce">allreduce() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.allreduce_strategy">allreduce_strategy (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceFusionOp">AllReduceFusionOp (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceParams">AllReduceParams (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceStrategy">AllReduceStrategy (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.apply_batched_logits_processor">apply_batched_logits_processor (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RopeEmbeddingUtils.apply_llama3_scaling">apply_llama3_scaling() (tensorrt_llm.functional.RopeEmbeddingUtils static method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RopeEmbeddingUtils.apply_rotary_pos_emb">apply_rotary_pos_emb() (tensorrt_llm.functional.RopeEmbeddingUtils static method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RopeEmbeddingUtils.apply_rotary_pos_emb_chatglm">apply_rotary_pos_emb_chatglm() (tensorrt_llm.functional.RopeEmbeddingUtils static method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RopeEmbeddingUtils.apply_rotary_pos_emb_cogvlm">apply_rotary_pos_emb_cogvlm() (tensorrt_llm.functional.RopeEmbeddingUtils static method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.arange">arange() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.aresult">aresult() (tensorrt_llm.llmapi.RequestOutput method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.argmax">argmax() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestError.args">args (tensorrt_llm.llmapi.RequestError attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaForCausalLM.assert_valid_quant_algo">assert_valid_quant_algo() (tensorrt_llm.models.GemmaForCausalLM class method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.assertion">assertion() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.Attention">Attention (class in tensorrt_llm.layers.attention)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.attention_dp_config">attention_dp_config (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.attention_dp_events_gather_period_ms">attention_dp_events_gather_period_ms (tensorrt_llm.llmapi.KvCacheConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig">AttentionDpConfig (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.Config">AttentionDpConfig.Config (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.AttentionMaskParams">AttentionMaskParams (class in tensorrt_llm.layers.attention)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AttentionMaskType">AttentionMaskType (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.AttentionParams">AttentionParams (class in tensorrt_llm.layers.attention)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.attn_backend">attn_backend (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.SD3Transformer2DModel.attn_processors">attn_processors (tensorrt_llm.models.SD3Transformer2DModel property)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.audio_engine_dir">audio_engine_dir (tensorrt_llm.runtime.MultimodalModelRunner property)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceStrategy.AUTO">AUTO (tensorrt_llm.functional.AllReduceStrategy attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.SpeculativeDecodingMode.AUTO">(tensorrt_llm.models.SpeculativeDecodingMode attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig">AutoDecodingConfig (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.Config">AutoDecodingConfig.Config (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.avg_pool2d">avg_pool2d() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.pooling.AvgPool2d">AvgPool2d (class in tensorrt_llm.layers.pooling)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.SliceInputType.axes">axes (tensorrt_llm.functional.SliceInputType attribute)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="B">B</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.backend">backend (tensorrt_llm.llmapi.CacheTransceiverConfig attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.backend">(tensorrt_llm.llmapi.MoeConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.backend">(tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.backend">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.bad">bad (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.bad_token_ids">bad_token_ids (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.bad_words_list">bad_words_list (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.BaichuanForCausalLM">BaichuanForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.batch_size">batch_size (tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig.batch_sizes">batch_sizes (tensorrt_llm.llmapi.CudaGraphConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.batch_wait_max_tokens_ratio">batch_wait_max_tokens_ratio (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.batch_wait_timeout_iters">batch_wait_timeout_iters (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.batch_wait_timeout_ms">batch_wait_timeout_ms (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.batched_logits_processor">batched_logits_processor (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.batched_logits_processor">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.batching_type">batching_type (tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.batching_wait_iters">batching_wait_iters (tensorrt_llm.llmapi.AttentionDpConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType">BatchingType (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.beam_search_diversity_rate">beam_search_diversity_rate (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.beam_search_diversity_rate">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.beam_width_array">beam_width_array (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.BEGIN_THINKING_PHASE_TOKEN">BEGIN_THINKING_PHASE_TOKEN (tensorrt_llm.llmapi.MTPDecodingConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.bert_attention">bert_attention() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.bert_attention_plugin">bert_attention_plugin (tensorrt_llm.plugin.PluginConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.bert_context_fmha_fp32_acc">bert_context_fmha_fp32_acc (tensorrt_llm.plugin.PluginConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.BertAttention">BertAttention (class in tensorrt_llm.layers.attention)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.BertForQuestionAnswering">BertForQuestionAnswering (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.BertForSequenceClassification">BertForSequenceClassification (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.BertModel">BertModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.best_of">best_of (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AttentionMaskType.bidirectional">bidirectional (tensorrt_llm.functional.AttentionMaskType attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AttentionMaskType.bidirectionalglm">bidirectionalglm (tensorrt_llm.functional.AttentionMaskType attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AttentionMaskType.blocksparse">blocksparse (tensorrt_llm.functional.AttentionMaskType attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.BlockSparseAttnParams">BlockSparseAttnParams (class in tensorrt_llm.layers.attention)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.BloomForCausalLM">BloomForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.BloomModel">BloomModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.broadcast_helper">broadcast_helper() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.buffer_allocated">buffer_allocated (tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.build_config">build_config (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.build_config">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildCacheConfig">BuildCacheConfig (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig">BuildConfig (class in tensorrt_llm.llmapi)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="C">C</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildCacheConfig.cache_root">cache_root (tensorrt_llm.llmapi.BuildCacheConfig attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#id13">(tensorrt_llm.llmapi.BuildCacheConfig property)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.cache_transceiver_config">cache_transceiver_config (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.cache_transceiver_config">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig">CacheTransceiverConfig (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.Config">CacheTransceiverConfig.Config (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.calculate_speculative_resource">calculate_speculative_resource() (tensorrt_llm.llmapi.LookaheadDecodingConfig method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.calib_batch_size">calib_batch_size (tensorrt_llm.llmapi.CalibConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.calib_batches">calib_batches (tensorrt_llm.llmapi.CalibConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.calib_config">calib_config (tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.calib_dataset">calib_dataset (tensorrt_llm.llmapi.CalibConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.calib_max_seq_length">calib_max_seq_length (tensorrt_llm.llmapi.CalibConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig">CalibConfig (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.Config">CalibConfig.Config (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.capacity_scheduler_policy">capacity_scheduler_policy (tensorrt_llm.llmapi.SchedulerConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy">CapacitySchedulerPolicy (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.capitalize">capitalize() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.capitalize">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.capitalize">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.capitalize">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.capture_num_tokens">capture_num_tokens (tensorrt_llm.llmapi.TorchCompileConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.casefold">casefold() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.casefold">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.casefold">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.casefold">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.cast.Cast">Cast (class in tensorrt_llm.layers.cast)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.cast">cast() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.cast">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.categorical_sample">categorical_sample() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AttentionMaskType.causal">causal (tensorrt_llm.functional.AttentionMaskType attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.center">center() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.center">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.center">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.center">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.chatglm">chatglm (tensorrt_llm.functional.PositionEmbeddingType attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.ChatGLMConfig">ChatGLMConfig (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.ChatGLMForCausalLM">ChatGLMForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ChatGLMGenerationSession">ChatGLMGenerationSession (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.ChatGLMModel">ChatGLMModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.DecoderModel.check_config">check_config() (tensorrt_llm.models.DecoderModel method)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.DiT.check_config">(tensorrt_llm.models.DiT method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.EncoderModel.check_config">(tensorrt_llm.models.EncoderModel method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.FalconForCausalLM.check_config">(tensorrt_llm.models.FalconForCausalLM method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.MPTForCausalLM.check_config">(tensorrt_llm.models.MPTForCausalLM method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.OPTForCausalLM.check_config">(tensorrt_llm.models.OPTForCausalLM method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.PhiForCausalLM.check_config">(tensorrt_llm.models.PhiForCausalLM method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedModel.check_config">(tensorrt_llm.models.PretrainedModel method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.check_eagle_choices">check_eagle_choices() (tensorrt_llm.llmapi.EagleDecodingConfig method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.checkpoint_format">checkpoint_format (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.checkpoint_loader">checkpoint_loader (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.choices">choices() (tensorrt_llm.functional.PositionEmbeddingType static method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.chunk">chunk() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LoRARequest.ckpt_source">ckpt_source (tensorrt_llm.llmapi.LoRARequest property)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.clamp_val">clamp_val (tensorrt_llm.llmapi.QuantConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.clear_logprob_params">clear_logprob_params() (tensorrt_llm.llmapi.RequestOutput method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.client_id">client_id (tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.clip">clip() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.CLIPVisionTransformer">CLIPVisionTransformer (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.CogVLMAttention">CogVLMAttention (class in tensorrt_llm.layers.attention)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.CogVLMConfig">CogVLMConfig (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.CogVLMForCausalLM">CogVLMForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.CohereForCausalLM">CohereForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.Linear.collect_and_bias">collect_and_bias() (tensorrt_llm.layers.linear.Linear method)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.LinearBase.collect_and_bias">(tensorrt_llm.layers.linear.LinearBase method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.RowLinear.collect_and_bias">(tensorrt_llm.layers.linear.RowLinear method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.ColumnLinear">ColumnLinear (in module tensorrt_llm.layers.linear)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.CombinedTimestepLabelEmbeddings">CombinedTimestepLabelEmbeddings (class in tensorrt_llm.layers.embedding)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.CombinedTimestepTextProjEmbeddings">CombinedTimestepTextProjEmbeddings (class in tensorrt_llm.layers.embedding)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CompletionOutput">CompletionOutput (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.compute_relative_bias">compute_relative_bias() (in module tensorrt_llm.layers.attention)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.concat">concat() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Conditional">Conditional (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.BaichuanForCausalLM.config_class">config_class (tensorrt_llm.models.BaichuanForCausalLM attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.ChatGLMForCausalLM.config_class">(tensorrt_llm.models.ChatGLMForCausalLM attribute)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.CogVLMForCausalLM.config_class">(tensorrt_llm.models.CogVLMForCausalLM attribute)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.CohereForCausalLM.config_class">(tensorrt_llm.models.CohereForCausalLM attribute)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.DbrxForCausalLM.config_class">(tensorrt_llm.models.DbrxForCausalLM attribute)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.DeepseekForCausalLM.config_class">(tensorrt_llm.models.DeepseekForCausalLM attribute)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.DeepseekV2ForCausalLM.config_class">(tensorrt_llm.models.DeepseekV2ForCausalLM attribute)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.EagleForCausalLM.config_class">(tensorrt_llm.models.EagleForCausalLM attribute)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.FalconForCausalLM.config_class">(tensorrt_llm.models.FalconForCausalLM attribute)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaForCausalLM.config_class">(tensorrt_llm.models.GemmaForCausalLM attribute)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTForCausalLM.config_class">(tensorrt_llm.models.GPTForCausalLM attribute)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTJForCausalLM.config_class">(tensorrt_llm.models.GPTJForCausalLM attribute)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.LLaMAForCausalLM.config_class">(tensorrt_llm.models.LLaMAForCausalLM attribute)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.MambaForCausalLM.config_class">(tensorrt_llm.models.MambaForCausalLM attribute)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.MedusaForCausalLm.config_class">(tensorrt_llm.models.MedusaForCausalLm attribute)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.MLLaMAForCausalLM.config_class">(tensorrt_llm.models.MLLaMAForCausalLM attribute)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.Phi3ForCausalLM.config_class">(tensorrt_llm.models.Phi3ForCausalLM attribute)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.PhiForCausalLM.config_class">(tensorrt_llm.models.PhiForCausalLM attribute)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.SD3Transformer2DModel.config_class">(tensorrt_llm.models.SD3Transformer2DModel attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.constant">constant() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.constant_to_tensor_">constant_to_tensor_() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.constants_to_tensors_">constants_to_tensors_() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.construct">construct() (tensorrt_llm.llmapi.AttentionDpConfig class method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.construct">(tensorrt_llm.llmapi.AutoDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.construct">(tensorrt_llm.llmapi.BuildConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.construct">(tensorrt_llm.llmapi.CacheTransceiverConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.construct">(tensorrt_llm.llmapi.CalibConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig.construct">(tensorrt_llm.llmapi.CudaGraphConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.construct">(tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.construct">(tensorrt_llm.llmapi.DraftTargetDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.construct">(tensorrt_llm.llmapi.DynamicBatchConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.construct">(tensorrt_llm.llmapi.EagleDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.construct">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.construct">(tensorrt_llm.llmapi.KvCacheConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.construct">(tensorrt_llm.llmapi.LookaheadDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.construct">(tensorrt_llm.llmapi.MedusaDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.construct">(tensorrt_llm.llmapi.MoeConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.construct">(tensorrt_llm.llmapi.MTPDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.construct">(tensorrt_llm.llmapi.NGramDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.construct">(tensorrt_llm.llmapi.RocketSparseAttentionConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.construct">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.construct">(tensorrt_llm.llmapi.SchedulerConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.construct">(tensorrt_llm.llmapi.TorchCompileConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.construct">(tensorrt_llm.llmapi.UserProvidedDecodingConfig class method)</a>
</li>
      </ul></li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.Session.context">context (tensorrt_llm.runtime.Session property)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.context_chunking_policy">context_chunking_policy (tensorrt_llm.llmapi.SchedulerConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.context_fmha">context_fmha (tensorrt_llm.plugin.PluginConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.context_fmha_type">context_fmha_type (tensorrt_llm.plugin.PluginConfig property)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.context_logits">context_logits (tensorrt_llm.llmapi.RequestOutput attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#id6">(tensorrt_llm.llmapi.RequestOutput property)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.context_mem_size">context_mem_size (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.Session.context_mem_size">(tensorrt_llm.runtime.Session property)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.context_parallel_size">context_parallel_size (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.context_parallel_size">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy">ContextChunkingPolicy (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.conv.Conv1d">Conv1d (class in tensorrt_llm.layers.conv)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.conv1d">conv1d() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.conv.Conv2d">Conv2d (class in tensorrt_llm.layers.conv)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.conv2d">conv2d() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.conv.Conv3d">Conv3d (class in tensorrt_llm.layers.conv)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.conv3d">conv3d() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.conv_kernel">conv_kernel (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.conv_kernel">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.conv_transpose2d">conv_transpose2d() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.convert_enable_disable">convert_enable_disable() (tensorrt_llm.plugin.PluginConfig class method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.convert_load_format">convert_load_format() (tensorrt_llm.llmapi.TorchLlmArgs class method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.conv.ConvTranspose2d">ConvTranspose2d (class in tensorrt_llm.layers.conv)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.copy">copy() (tensorrt_llm.llmapi.AttentionDpConfig method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.copy">(tensorrt_llm.llmapi.AutoDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.copy">(tensorrt_llm.llmapi.BuildConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.copy">(tensorrt_llm.llmapi.CacheTransceiverConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.copy">(tensorrt_llm.llmapi.CalibConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig.copy">(tensorrt_llm.llmapi.CudaGraphConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.copy">(tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.copy">(tensorrt_llm.llmapi.DraftTargetDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.copy">(tensorrt_llm.llmapi.DynamicBatchConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.copy">(tensorrt_llm.llmapi.EagleDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.copy">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.copy">(tensorrt_llm.llmapi.KvCacheConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.copy">(tensorrt_llm.llmapi.LookaheadDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.copy">(tensorrt_llm.llmapi.MedusaDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.copy">(tensorrt_llm.llmapi.MoeConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.copy">(tensorrt_llm.llmapi.MTPDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.copy">(tensorrt_llm.llmapi.NGramDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.copy">(tensorrt_llm.llmapi.RocketSparseAttentionConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.copy">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.copy">(tensorrt_llm.llmapi.SchedulerConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.copy">(tensorrt_llm.llmapi.TorchCompileConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.copy">(tensorrt_llm.llmapi.UserProvidedDecodingConfig method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.copy_on_partial_reuse">copy_on_partial_reuse (tensorrt_llm.llmapi.KvCacheConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.cos">cos() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.count">count() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.count">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.count">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.count">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.count">(tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.cp_config">cp_config (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.cp_config">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.cp_split_plugin">cp_split_plugin() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.cpp_e2e">cpp_e2e (tensorrt_llm.runtime.MultimodalModelRunner property)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.cpp_llm_only">cpp_llm_only (tensorrt_llm.runtime.MultimodalModelRunner property)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.create_allreduce_plugin">create_allreduce_plugin() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.Attention.create_attention_const_params">create_attention_const_params() (tensorrt_llm.layers.attention.Attention static method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RopeEmbeddingUtils.create_fake_weight">create_fake_weight() (tensorrt_llm.functional.RopeEmbeddingUtils static method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.create_runtime_defaults">create_runtime_defaults() (tensorrt_llm.models.PretrainedConfig static method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions">create_sinusoidal_positions() (tensorrt_llm.functional.RopeEmbeddingUtils static method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_for_attention_plugin">create_sinusoidal_positions_for_attention_plugin() (tensorrt_llm.functional.RopeEmbeddingUtils static method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_for_cogvlm_attention_plugin">create_sinusoidal_positions_for_cogvlm_attention_plugin() (tensorrt_llm.functional.RopeEmbeddingUtils static method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_long_rope">create_sinusoidal_positions_long_rope() (tensorrt_llm.functional.RopeEmbeddingUtils static method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_long_rope_for_attention_plugin">create_sinusoidal_positions_long_rope_for_attention_plugin() (tensorrt_llm.functional.RopeEmbeddingUtils method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_yarn">create_sinusoidal_positions_yarn() (tensorrt_llm.functional.RopeEmbeddingUtils static method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.SD3PatchEmbed.cropped_pos_embed">cropped_pos_embed() (tensorrt_llm.layers.embedding.SD3PatchEmbed method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.cross_attention">cross_attention (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.cross_attention">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.cross_kv_cache_fraction">cross_kv_cache_fraction (tensorrt_llm.llmapi.KvCacheConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DisaggregatedParams.ctx_request_id">ctx_request_id (tensorrt_llm.llmapi.DisaggregatedParams attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.cuda_graph_cache_size">cuda_graph_cache_size (tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.cuda_graph_config">cuda_graph_config (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.cuda_graph_mode">cuda_graph_mode (tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.cuda_graph_mode">(tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.cuda_stream_guard">cuda_stream_guard() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.cuda_stream_sync">cuda_stream_sync() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig">CudaGraphConfig (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig.Config">CudaGraphConfig.Config (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.cumsum">cumsum() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CompletionOutput.cumulative_logprob">cumulative_logprob (tensorrt_llm.llmapi.CompletionOutput attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AttentionMaskType.custom_mask">custom_mask (tensorrt_llm.functional.AttentionMaskType attribute)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="D">D</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.SliceInputType.data">data (tensorrt_llm.functional.SliceInputType attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.DbrxConfig">DbrxConfig (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.DbrxForCausalLM">DbrxForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.debug_mode">debug_mode (tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.debug_tensors_to_save">debug_tensors_to_save (tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.decode">decode() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.decode_batch">decode_batch() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheRetentionConfig.decode_duration_ms">decode_duration_ms (tensorrt_llm.llmapi.KvCacheRetentionConfig property)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.decode_regular">decode_regular() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheRetentionConfig.decode_retention_priority">decode_retention_priority (tensorrt_llm.llmapi.KvCacheRetentionConfig property)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.decode_stream">decode_stream() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.decode_words_list">decode_words_list() (in module tensorrt_llm.runtime)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.DecoderModel">DecoderModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.decoding_config">decoding_config (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.decoding_config">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.decoding_type">decoding_type (tensorrt_llm.llmapi.AutoDecodingConfig attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.decoding_type">(tensorrt_llm.llmapi.DraftTargetDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.decoding_type">(tensorrt_llm.llmapi.EagleDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.decoding_type">(tensorrt_llm.llmapi.LookaheadDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.decoding_type">(tensorrt_llm.llmapi.MedusaDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.decoding_type">(tensorrt_llm.llmapi.MTPDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.decoding_type">(tensorrt_llm.llmapi.NGramDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.decoding_type">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.decoding_type">(tensorrt_llm.llmapi.UserProvidedDecodingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.DeepseekForCausalLM">DeepseekForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig">DeepSeekSparseAttentionConfig (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.Config">DeepSeekSparseAttentionConfig.Config (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.DeepseekV2Attention">DeepseekV2Attention (class in tensorrt_llm.layers.attention)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.DeepseekV2ForCausalLM">DeepseekV2ForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.CogVLMForCausalLM.default_plugin_config">default_plugin_config() (tensorrt_llm.models.CogVLMForCausalLM method)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.LLaMAForCausalLM.default_plugin_config">(tensorrt_llm.models.LLaMAForCausalLM method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.PostprocWorker.default_record_creator">default_record_creator() (tensorrt_llm.llmapi.RequestOutput.PostprocWorker static method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.deferred">deferred (tensorrt_llm.functional.PositionEmbeddingType attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.detokenize">detokenize (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.device">device (tensorrt_llm.llmapi.CalibConfig attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.device">(tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.dict">dict() (tensorrt_llm.llmapi.AttentionDpConfig method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.dict">(tensorrt_llm.llmapi.AutoDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.dict">(tensorrt_llm.llmapi.BuildConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.dict">(tensorrt_llm.llmapi.CacheTransceiverConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.dict">(tensorrt_llm.llmapi.CalibConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig.dict">(tensorrt_llm.llmapi.CudaGraphConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.dict">(tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.dict">(tensorrt_llm.llmapi.DraftTargetDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.dict">(tensorrt_llm.llmapi.DynamicBatchConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.dict">(tensorrt_llm.llmapi.EagleDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.dict">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.dict">(tensorrt_llm.llmapi.KvCacheConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.dict">(tensorrt_llm.llmapi.LookaheadDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.dict">(tensorrt_llm.llmapi.MedusaDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.dict">(tensorrt_llm.llmapi.MoeConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.dict">(tensorrt_llm.llmapi.MTPDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.dict">(tensorrt_llm.llmapi.NGramDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.dict">(tensorrt_llm.llmapi.RocketSparseAttentionConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.dict">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.dict">(tensorrt_llm.llmapi.SchedulerConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.dict">(tensorrt_llm.llmapi.TorchCompileConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.dict">(tensorrt_llm.llmapi.UserProvidedDecodingConfig method)</a>
</li>
      </ul></li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.DiffusersAttention">DiffusersAttention (class in tensorrt_llm.layers.attention)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.DimRange">DimRange (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheRetentionConfig.directory">directory (tensorrt_llm.llmapi.KvCacheRetentionConfig property)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.SideStreamIDType.disable">disable (tensorrt_llm.functional.SideStreamIDType attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.disable_finalize_fusion">disable_finalize_fusion (tensorrt_llm.llmapi.MoeConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.SD3Transformer2DModel.disable_forward_chunking">disable_forward_chunking() (tensorrt_llm.models.SD3Transformer2DModel method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.disable_overlap_scheduler">disable_overlap_scheduler (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CompletionOutput.disaggregated_params">disaggregated_params (tensorrt_llm.llmapi.CompletionOutput attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.disaggregated_params">(tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DisaggregatedParams">DisaggregatedParams (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.DiT">DiT (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.div">div() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.do_tracing">do_tracing() (tensorrt_llm.llmapi.RequestOutput method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.dora_plugin">dora_plugin (tensorrt_llm.plugin.PluginConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.dora_plugin">dora_plugin() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DisaggregatedParams.draft_tokens">draft_tokens (tensorrt_llm.llmapi.DisaggregatedParams attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.SpeculativeDecodingMode.DRAFT_TOKENS_EXTERNAL">DRAFT_TOKENS_EXTERNAL (tensorrt_llm.models.SpeculativeDecodingMode attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.drafter">drafter (tensorrt_llm.llmapi.UserProvidedDecodingConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig">DraftTargetDecodingConfig (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.Config">DraftTargetDecodingConfig.Config (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.dry_run">dry_run (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.dtype">dtype (tensorrt_llm.functional.Tensor property)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.dtype">(tensorrt_llm.llmapi.KvCacheConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.dtype">(tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.dtype">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.dtype">(tensorrt_llm.plugin.PluginConfig attribute)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.dtype">(tensorrt_llm.runtime.GenerationSession property)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.dtype">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.dtype">(tensorrt_llm.runtime.ModelRunner property)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.dtype">(tensorrt_llm.runtime.ModelRunnerCpp property)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.TensorInfo.dtype">(tensorrt_llm.runtime.TensorInfo attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.dump_debug_buffers">dump_debug_buffers() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.duration_ms">duration_ms (tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig property)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RotaryScalingType.dynamic">dynamic (tensorrt_llm.functional.RotaryScalingType attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.dynamic_batch_config">dynamic_batch_config (tensorrt_llm.llmapi.SchedulerConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.dynamic_batch_moving_average_window">dynamic_batch_moving_average_window (tensorrt_llm.llmapi.DynamicBatchConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.dynamic_tree_max_topK">dynamic_tree_max_topK (tensorrt_llm.llmapi.EagleDecodingConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig">DynamicBatchConfig (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.Config">DynamicBatchConfig.Config (class in tensorrt_llm.llmapi)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="E">E</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.SpeculativeDecodingMode.EAGLE">EAGLE (tensorrt_llm.models.SpeculativeDecodingMode attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.eagle3_layers_to_capture">eagle3_layers_to_capture (tensorrt_llm.llmapi.EagleDecodingConfig attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.eagle3_layers_to_capture">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.eagle3_one_model">eagle3_one_model (tensorrt_llm.llmapi.EagleDecodingConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.eagle_choices">eagle_choices (tensorrt_llm.llmapi.EagleDecodingConfig attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.eagle_choices">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig">EagleDecodingConfig (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.Config">EagleDecodingConfig.Config (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.EagleForCausalLM">EagleForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.early_stop_criteria">early_stop_criteria() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.early_stopping">early_stopping (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.early_stopping">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.einsum">einsum() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.elementwise_binary">elementwise_binary() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.Embedding">Embedding (class in tensorrt_llm.layers.embedding)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.embedding">embedding() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.embedding_bias">embedding_bias (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.embedding_parallel_mode">embedding_parallel_mode (tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.enable_attention_dp">enable_attention_dp (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.enable_attention_dp">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.enable_autotuner">enable_autotuner (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.enable_balance">enable_balance (tensorrt_llm.llmapi.AttentionDpConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.enable_batch_size_tuning">enable_batch_size_tuning (tensorrt_llm.llmapi.DynamicBatchConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.enable_block_reuse">enable_block_reuse (tensorrt_llm.llmapi.KvCacheConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.enable_build_cache">enable_build_cache (tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.enable_chunked_prefill">enable_chunked_prefill (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.enable_chunked_prefill">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.enable_context_fmha_fp32_acc">enable_context_fmha_fp32_acc (tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.enable_debug_output">enable_debug_output (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.SD3Transformer2DModel.enable_forward_chunking">enable_forward_chunking() (tensorrt_llm.models.SD3Transformer2DModel method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.enable_fullgraph">enable_fullgraph (tensorrt_llm.llmapi.TorchCompileConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.enable_inductor">enable_inductor (tensorrt_llm.llmapi.TorchCompileConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.enable_iter_perf_stats">enable_iter_perf_stats (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.enable_iter_req_stats">enable_iter_req_stats (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.enable_layerwise_nvtx_marker">enable_layerwise_nvtx_marker (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.enable_lm_head_tp_in_adp">enable_lm_head_tp_in_adp (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.enable_lm_head_tp_in_adp">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.enable_lora">enable_lora (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.enable_lora">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.enable_max_num_tokens_tuning">enable_max_num_tokens_tuning (tensorrt_llm.llmapi.DynamicBatchConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.enable_min_latency">enable_min_latency (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig.enable_padding">enable_padding (tensorrt_llm.llmapi.CudaGraphConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.enable_paged_kv_cache">enable_paged_kv_cache() (tensorrt_llm.plugin.PluginConfig method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.enable_partial_reuse">enable_partial_reuse (tensorrt_llm.llmapi.KvCacheConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.enable_piecewise_cuda_graph">enable_piecewise_cuda_graph (tensorrt_llm.llmapi.TorchCompileConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.enable_prompt_adapter">enable_prompt_adapter (tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.enable_sleep">enable_sleep (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.enable_tqdm">enable_tqdm (tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.enable_userbuffers">enable_userbuffers (tensorrt_llm.llmapi.TorchCompileConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.EncDecModelRunner">EncDecModelRunner (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.encode">encode() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.encode">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.encode">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.encode">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.EncDecModelRunner.encoder_run">encoder_run() (tensorrt_llm.runtime.EncDecModelRunner method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.EncoderModel">EncoderModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.end_id">end_id (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.end_id">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.END_THINKING_PHASE_TOKEN">END_THINKING_PHASE_TOKEN (tensorrt_llm.llmapi.MTPDecodingConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.endswith">endswith() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.endswith">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.endswith">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.endswith">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.Session.engine">engine (tensorrt_llm.runtime.Session property)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.engine_inspector">engine_inspector (tensorrt_llm.runtime.GenerationSession property)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.eq">eq() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.EQUAL_PROGRESS">EQUAL_PROGRESS (tensorrt_llm.llmapi.ContextChunkingPolicy attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.error">error (tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.event_buffer_max_size">event_buffer_max_size (tensorrt_llm.llmapi.KvCacheConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.exclude_input_from_output">exclude_input_from_output (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.exclude_modules">exclude_modules (tensorrt_llm.llmapi.QuantConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.exp">exp() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.expand">expand() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.expand_dims">expand_dims() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.expand_dims_like">expand_dims_like() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.expand_mask">expand_mask() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.expandtabs">expandtabs() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.expandtabs">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.expandtabs">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.expandtabs">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.SpeculativeDecodingMode.EXPLICIT_DRAFT_TOKENS">EXPLICIT_DRAFT_TOKENS (tensorrt_llm.models.SpeculativeDecodingMode attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.extended_runtime_perf_knob_config">extended_runtime_perf_knob_config (tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig">ExtendedRuntimePerfKnobConfig (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.Config">ExtendedRuntimePerfKnobConfig.Config (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.Config.extra">extra (tensorrt_llm.llmapi.AttentionDpConfig.Config attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.Config.extra">(tensorrt_llm.llmapi.AutoDecodingConfig.Config attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.Config.extra">(tensorrt_llm.llmapi.CacheTransceiverConfig.Config attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.Config.extra">(tensorrt_llm.llmapi.CalibConfig.Config attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig.Config.extra">(tensorrt_llm.llmapi.CudaGraphConfig.Config attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.Config.extra">(tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.Config attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.Config.extra">(tensorrt_llm.llmapi.DraftTargetDecodingConfig.Config attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.Config.extra">(tensorrt_llm.llmapi.DynamicBatchConfig.Config attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.Config.extra">(tensorrt_llm.llmapi.EagleDecodingConfig.Config attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.Config.extra">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.Config attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.Config.extra">(tensorrt_llm.llmapi.KvCacheConfig.Config attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.Config.extra">(tensorrt_llm.llmapi.LookaheadDecodingConfig.Config attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.Config.extra">(tensorrt_llm.llmapi.MedusaDecodingConfig.Config attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.Config.extra">(tensorrt_llm.llmapi.MoeConfig.Config attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.Config.extra">(tensorrt_llm.llmapi.MTPDecodingConfig.Config attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.Config.extra">(tensorrt_llm.llmapi.NGramDecodingConfig.Config attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.Config.extra">(tensorrt_llm.llmapi.RocketSparseAttentionConfig.Config attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.Config.extra">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.Config attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.Config.extra">(tensorrt_llm.llmapi.SchedulerConfig.Config attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.Config.extra">(tensorrt_llm.llmapi.TorchCompileConfig.Config attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.Config.extra">(tensorrt_llm.llmapi.TorchLlmArgs.Config attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.Config.extra">(tensorrt_llm.llmapi.TrtLlmArgs.Config attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.Config.extra">(tensorrt_llm.llmapi.UserProvidedDecodingConfig.Config attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.extra_resource_managers">extra_resource_managers (tensorrt_llm.llmapi.TorchLlmArgs property)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="F">F</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.fail_fast_on_attention_window_too_large">fail_fast_on_attention_window_too_large (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.fail_fast_on_attention_window_too_large">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.FalconConfig">FalconConfig (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.FalconForCausalLM">FalconForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.FalconModel">FalconModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.fast_build">fast_build (tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.FusedGatedMLP.fc_gate">fc_gate() (tensorrt_llm.layers.mlp.FusedGatedMLP method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.fc_gate_dora">fc_gate_dora() (in module tensorrt_llm.layers.mlp)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.fc_gate_lora">fc_gate_lora() (in module tensorrt_llm.layers.mlp)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.FusedGatedMLP.fc_gate_plugin">fc_gate_plugin() (tensorrt_llm.layers.mlp.FusedGatedMLP method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.field_name">field_name (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.field_name">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.file_prefix">file_prefix (tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.AttentionParams.fill_attention_const_params_for_long_rope">fill_attention_const_params_for_long_rope() (tensorrt_llm.layers.attention.AttentionParams method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.AttentionParams.fill_attention_const_params_for_rope">fill_attention_const_params_for_rope() (tensorrt_llm.layers.attention.AttentionParams method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.Attention.fill_attention_params">fill_attention_params() (tensorrt_llm.layers.attention.Attention static method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.KeyValueCacheParams.fill_none_tensor_list">fill_none_tensor_list() (tensorrt_llm.layers.attention.KeyValueCacheParams method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.SliceInputType.fill_value">fill_value (tensorrt_llm.functional.SliceInputType attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.filter_medusa_logits">filter_medusa_logits() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.finalize_decoder">finalize_decoder() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.find">find() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.find">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.find">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.find">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.find_best_medusa_path">find_best_medusa_path() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CompletionOutput.finish_reason">finish_reason (tensorrt_llm.llmapi.CompletionOutput attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.finished">finished (tensorrt_llm.llmapi.RequestOutput attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#id7">(tensorrt_llm.llmapi.RequestOutput property)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.FIRST_COME_FIRST_SERVED">FIRST_COME_FIRST_SERVED (tensorrt_llm.llmapi.ContextChunkingPolicy attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DisaggregatedParams.first_gen_tokens">first_gen_tokens (tensorrt_llm.llmapi.DisaggregatedParams attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.first_layer">first_layer (tensorrt_llm.runtime.GenerationSession property)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.flatten">flatten() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.flatten">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.flip">flip() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.floordiv">floordiv() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#c.FMT_DIM">FMT_DIM (C macro)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.for_each_rank">for_each_rank() (tensorrt_llm.models.PretrainedConfig method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.force_dynamic_quantization">force_dynamic_quantization (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.force_num_profiles">force_num_profiles (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.format">format() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.format">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.format">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.format">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.format_map">format_map() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.format_map">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.format_map">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.format_map">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.activation.Mish.forward">forward() (tensorrt_llm.layers.activation.Mish method)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.Attention.forward">(tensorrt_llm.layers.attention.Attention method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.BertAttention.forward">(tensorrt_llm.layers.attention.BertAttention method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.CogVLMAttention.forward">(tensorrt_llm.layers.attention.CogVLMAttention method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.DeepseekV2Attention.forward">(tensorrt_llm.layers.attention.DeepseekV2Attention method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.DiffusersAttention.forward">(tensorrt_llm.layers.attention.DiffusersAttention method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.cast.Cast.forward">(tensorrt_llm.layers.cast.Cast method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.conv.Conv1d.forward">(tensorrt_llm.layers.conv.Conv1d method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.conv.Conv2d.forward">(tensorrt_llm.layers.conv.Conv2d method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.conv.Conv3d.forward">(tensorrt_llm.layers.conv.Conv3d method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.conv.ConvTranspose2d.forward">(tensorrt_llm.layers.conv.ConvTranspose2d method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.CombinedTimestepLabelEmbeddings.forward">(tensorrt_llm.layers.embedding.CombinedTimestepLabelEmbeddings method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.CombinedTimestepTextProjEmbeddings.forward">(tensorrt_llm.layers.embedding.CombinedTimestepTextProjEmbeddings method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.Embedding.forward">(tensorrt_llm.layers.embedding.Embedding method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.LabelEmbedding.forward">(tensorrt_llm.layers.embedding.LabelEmbedding method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.PixArtAlphaTextProjection.forward">(tensorrt_llm.layers.embedding.PixArtAlphaTextProjection method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.PromptTuningEmbedding.forward">(tensorrt_llm.layers.embedding.PromptTuningEmbedding method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.SD3PatchEmbed.forward">(tensorrt_llm.layers.embedding.SD3PatchEmbed method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.TimestepEmbedding.forward">(tensorrt_llm.layers.embedding.TimestepEmbedding method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.Timesteps.forward">(tensorrt_llm.layers.embedding.Timesteps method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.LinearBase.forward">(tensorrt_llm.layers.linear.LinearBase method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.FusedGatedMLP.forward">(tensorrt_llm.layers.mlp.FusedGatedMLP method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.GatedMLP.forward">(tensorrt_llm.layers.mlp.GatedMLP method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.LinearActivation.forward">(tensorrt_llm.layers.mlp.LinearActivation method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.LinearApproximateGELU.forward">(tensorrt_llm.layers.mlp.LinearApproximateGELU method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.LinearGEGLU.forward">(tensorrt_llm.layers.mlp.LinearGEGLU method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.LinearGELU.forward">(tensorrt_llm.layers.mlp.LinearGELU method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.LinearSwiGLU.forward">(tensorrt_llm.layers.mlp.LinearSwiGLU method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.MLP.forward">(tensorrt_llm.layers.mlp.MLP method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.normalization.AdaLayerNorm.forward">(tensorrt_llm.layers.normalization.AdaLayerNorm method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.normalization.AdaLayerNormContinuous.forward">(tensorrt_llm.layers.normalization.AdaLayerNormContinuous method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.normalization.AdaLayerNormZero.forward">(tensorrt_llm.layers.normalization.AdaLayerNormZero method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.normalization.AdaLayerNormZeroSingle.forward">(tensorrt_llm.layers.normalization.AdaLayerNormZeroSingle method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.normalization.GroupNorm.forward">(tensorrt_llm.layers.normalization.GroupNorm method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.normalization.LayerNorm.forward">(tensorrt_llm.layers.normalization.LayerNorm method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.normalization.RmsNorm.forward">(tensorrt_llm.layers.normalization.RmsNorm method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.normalization.SD35AdaLayerNormZeroX.forward">(tensorrt_llm.layers.normalization.SD35AdaLayerNormZeroX method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.pooling.AvgPool2d.forward">(tensorrt_llm.layers.pooling.AvgPool2d method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.BertForQuestionAnswering.forward">(tensorrt_llm.models.BertForQuestionAnswering method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.BertForSequenceClassification.forward">(tensorrt_llm.models.BertForSequenceClassification method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.BertModel.forward">(tensorrt_llm.models.BertModel method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.BloomModel.forward">(tensorrt_llm.models.BloomModel method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.ChatGLMModel.forward">(tensorrt_llm.models.ChatGLMModel method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.CLIPVisionTransformer.forward">(tensorrt_llm.models.CLIPVisionTransformer method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.DecoderModel.forward">(tensorrt_llm.models.DecoderModel method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.DiT.forward">(tensorrt_llm.models.DiT method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.EagleForCausalLM.forward">(tensorrt_llm.models.EagleForCausalLM method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.EncoderModel.forward">(tensorrt_llm.models.EncoderModel method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.FalconModel.forward">(tensorrt_llm.models.FalconModel method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTJModel.forward">(tensorrt_llm.models.GPTJModel method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTModel.forward">(tensorrt_llm.models.GPTModel method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTNeoXModel.forward">(tensorrt_llm.models.GPTNeoXModel method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.LLaMAModel.forward">(tensorrt_llm.models.LLaMAModel method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.LlavaNextVisionWrapper.forward">(tensorrt_llm.models.LlavaNextVisionWrapper method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.MambaForCausalLM.forward">(tensorrt_llm.models.MambaForCausalLM method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.MLLaMAForCausalLM.forward">(tensorrt_llm.models.MLLaMAForCausalLM method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.MPTModel.forward">(tensorrt_llm.models.MPTModel method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.OPTModel.forward">(tensorrt_llm.models.OPTModel method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.Phi3Model.forward">(tensorrt_llm.models.Phi3Model method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.PhiModel.forward">(tensorrt_llm.models.PhiModel method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.RecurrentGemmaForCausalLM.forward">(tensorrt_llm.models.RecurrentGemmaForCausalLM method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.SD3Transformer2DModel.forward">(tensorrt_llm.models.SD3Transformer2DModel method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.WhisperEncoder.forward">(tensorrt_llm.models.WhisperEncoder method)</a>
</li>
      </ul></li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.DiT.forward_with_cfg">forward_with_cfg() (tensorrt_llm.models.DiT method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.DiT.forward_without_cfg">forward_without_cfg() (tensorrt_llm.models.DiT method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.FP8">FP8 (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.FP8_BLOCK_SCALES">FP8_BLOCK_SCALES (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.FP8_PER_CHANNEL_PER_TOKEN">FP8_PER_CHANNEL_PER_TOKEN (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.fp8_rowwise_gemm_plugin">fp8_rowwise_gemm_plugin (tensorrt_llm.plugin.PluginConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.free_gpu_memory_fraction">free_gpu_memory_fraction (tensorrt_llm.llmapi.KvCacheConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.frequency_penalty">frequency_penalty (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.frequency_penalty">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.SpeculativeDecodingMode.from_arguments">from_arguments() (tensorrt_llm.models.SpeculativeDecodingMode static method)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.from_arguments">(tensorrt_llm.plugin.PluginConfig class method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.from_checkpoint">from_checkpoint() (tensorrt_llm.models.PretrainedConfig class method)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedModel.from_checkpoint">(tensorrt_llm.models.PretrainedModel class method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedModel.from_config">from_config() (tensorrt_llm.models.PretrainedModel class method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.from_dict">from_dict() (tensorrt_llm.llmapi.AttentionDpConfig class method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.from_dict">(tensorrt_llm.llmapi.AutoDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.from_dict">(tensorrt_llm.llmapi.CalibConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.from_dict">(tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.from_dict">(tensorrt_llm.llmapi.DraftTargetDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.from_dict">(tensorrt_llm.llmapi.EagleDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.from_dict">(tensorrt_llm.llmapi.LookaheadDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.from_dict">(tensorrt_llm.llmapi.MedusaDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.from_dict">(tensorrt_llm.llmapi.MoeConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.from_dict">(tensorrt_llm.llmapi.MTPDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.from_dict">(tensorrt_llm.llmapi.NGramDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.from_dict">(tensorrt_llm.llmapi.QuantConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.from_dict">(tensorrt_llm.llmapi.RocketSparseAttentionConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.from_dict">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.from_dict">(tensorrt_llm.llmapi.UserProvidedDecodingConfig class method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.from_dict">(tensorrt_llm.models.PretrainedConfig class method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.from_dir">from_dir() (tensorrt_llm.runtime.ModelRunner class method)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.from_dir">(tensorrt_llm.runtime.ModelRunnerCpp class method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.EncDecModelRunner.from_engine">from_engine() (tensorrt_llm.runtime.EncDecModelRunner class method)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.from_engine">(tensorrt_llm.runtime.ModelRunner class method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.Session.from_engine">(tensorrt_llm.runtime.Session static method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.BaichuanForCausalLM.from_hugging_face">from_hugging_face() (tensorrt_llm.models.BaichuanForCausalLM class method)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.ChatGLMConfig.from_hugging_face">(tensorrt_llm.models.ChatGLMConfig class method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.ChatGLMForCausalLM.from_hugging_face">(tensorrt_llm.models.ChatGLMForCausalLM class method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.CogVLMForCausalLM.from_hugging_face">(tensorrt_llm.models.CogVLMForCausalLM class method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.CohereForCausalLM.from_hugging_face">(tensorrt_llm.models.CohereForCausalLM class method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.DeepseekForCausalLM.from_hugging_face">(tensorrt_llm.models.DeepseekForCausalLM class method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.DeepseekV2ForCausalLM.from_hugging_face">(tensorrt_llm.models.DeepseekV2ForCausalLM class method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.EagleForCausalLM.from_hugging_face">(tensorrt_llm.models.EagleForCausalLM class method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.FalconConfig.from_hugging_face">(tensorrt_llm.models.FalconConfig class method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.FalconForCausalLM.from_hugging_face">(tensorrt_llm.models.FalconForCausalLM class method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaConfig.from_hugging_face">(tensorrt_llm.models.GemmaConfig class method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaForCausalLM.from_hugging_face">(tensorrt_llm.models.GemmaForCausalLM class method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTConfig.from_hugging_face">(tensorrt_llm.models.GPTConfig class method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTForCausalLM.from_hugging_face">(tensorrt_llm.models.GPTForCausalLM class method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTJConfig.from_hugging_face">(tensorrt_llm.models.GPTJConfig class method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTJForCausalLM.from_hugging_face">(tensorrt_llm.models.GPTJForCausalLM class method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.LLaMAConfig.from_hugging_face">(tensorrt_llm.models.LLaMAConfig class method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.LLaMAForCausalLM.from_hugging_face">(tensorrt_llm.models.LLaMAForCausalLM class method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.LlavaNextVisionConfig.from_hugging_face">(tensorrt_llm.models.LlavaNextVisionConfig class method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.LlavaNextVisionWrapper.from_hugging_face">(tensorrt_llm.models.LlavaNextVisionWrapper class method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.MambaForCausalLM.from_hugging_face">(tensorrt_llm.models.MambaForCausalLM class method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.MedusaConfig.from_hugging_face">(tensorrt_llm.models.MedusaConfig class method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.MedusaForCausalLm.from_hugging_face">(tensorrt_llm.models.MedusaForCausalLm class method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.MLLaMAForCausalLM.from_hugging_face">(tensorrt_llm.models.MLLaMAForCausalLM class method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.Phi3ForCausalLM.from_hugging_face">(tensorrt_llm.models.Phi3ForCausalLM class method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.PhiForCausalLM.from_hugging_face">(tensorrt_llm.models.PhiForCausalLM class method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.from_json_file">from_json_file() (tensorrt_llm.llmapi.BuildConfig class method)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.from_json_file">(tensorrt_llm.models.PretrainedConfig class method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.from_kwargs">from_kwargs() (tensorrt_llm.llmapi.TorchLlmArgs class method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.from_kwargs">(tensorrt_llm.llmapi.TrtLlmArgs class method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.LLaMAConfig.from_meta_ckpt">from_meta_ckpt() (tensorrt_llm.models.LLaMAConfig class method)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.LLaMAForCausalLM.from_meta_ckpt">(tensorrt_llm.models.LLaMAForCausalLM class method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.from_model_config_cpp">from_model_config_cpp() (tensorrt_llm.runtime.ModelConfig class method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTConfig.from_nemo">from_nemo() (tensorrt_llm.models.GPTConfig class method)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTForCausalLM.from_nemo">(tensorrt_llm.models.GPTForCausalLM class method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.from_orm">from_orm() (tensorrt_llm.llmapi.AttentionDpConfig class method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.from_orm">(tensorrt_llm.llmapi.AutoDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.from_orm">(tensorrt_llm.llmapi.BuildConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.from_orm">(tensorrt_llm.llmapi.CacheTransceiverConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.from_orm">(tensorrt_llm.llmapi.CalibConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig.from_orm">(tensorrt_llm.llmapi.CudaGraphConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.from_orm">(tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.from_orm">(tensorrt_llm.llmapi.DraftTargetDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.from_orm">(tensorrt_llm.llmapi.DynamicBatchConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.from_orm">(tensorrt_llm.llmapi.EagleDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.from_orm">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.from_orm">(tensorrt_llm.llmapi.KvCacheConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.from_orm">(tensorrt_llm.llmapi.LookaheadDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.from_orm">(tensorrt_llm.llmapi.MedusaDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.from_orm">(tensorrt_llm.llmapi.MoeConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.from_orm">(tensorrt_llm.llmapi.MTPDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.from_orm">(tensorrt_llm.llmapi.NGramDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.from_orm">(tensorrt_llm.llmapi.RocketSparseAttentionConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.from_orm">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.from_orm">(tensorrt_llm.llmapi.SchedulerConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.from_orm">(tensorrt_llm.llmapi.TorchCompileConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.from_orm">(tensorrt_llm.llmapi.UserProvidedDecodingConfig class method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.SD3Transformer2DModel.from_pretrained">from_pretrained() (tensorrt_llm.models.SD3Transformer2DModel class method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.from_pybind">from_pybind() (tensorrt_llm.llmapi.CacheTransceiverConfig class method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.from_pybind">(tensorrt_llm.llmapi.DynamicBatchConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.from_pybind">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.from_pybind">(tensorrt_llm.llmapi.KvCacheConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.from_pybind">(tensorrt_llm.llmapi.LookaheadDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.from_pybind">(tensorrt_llm.llmapi.SchedulerConfig class method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.Session.from_serialized_engine">from_serialized_engine() (tensorrt_llm.runtime.Session static method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.from_string">from_string() (tensorrt_llm.functional.PositionEmbeddingType static method)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RotaryScalingType.from_string">(tensorrt_llm.functional.RotaryScalingType static method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.fuse_fp4_quant">fuse_fp4_quant (tensorrt_llm.plugin.PluginConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.SD3Transformer2DModel.fuse_qkv_projections">fuse_qkv_projections() (tensorrt_llm.models.SD3Transformer2DModel method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.FusedGatedMLP">FusedGatedMLP (class in tensorrt_llm.layers.mlp)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.MLPType.FusedGatedMLP">(tensorrt_llm.functional.MLPType attribute)</a>
</li>
      </ul></li>
  </ul></td>
</tr></table>

<h2 id="G">G</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.garbage_collection_gen0_threshold">garbage_collection_gen0_threshold (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.GatedMLP">GatedMLP (class in tensorrt_llm.layers.mlp)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.MLPType.GatedMLP">(tensorrt_llm.functional.MLPType attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.gather">gather() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.gather_context_logits">gather_context_logits (tensorrt_llm.llmapi.BuildConfig attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.gather_context_logits">(tensorrt_llm.runtime.GenerationSession property)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.gather_context_logits">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.gather_context_logits">(tensorrt_llm.runtime.ModelRunner property)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.gather_context_logits">(tensorrt_llm.runtime.ModelRunnerCpp property)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.gather_generation_logits">gather_generation_logits (tensorrt_llm.llmapi.BuildConfig attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.gather_generation_logits">(tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.gather_generation_logits">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.gather_generation_logits">(tensorrt_llm.runtime.GenerationSession property)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.gather_generation_logits">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.gather_generation_logits">(tensorrt_llm.runtime.ModelRunner property)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.gather_generation_logits">(tensorrt_llm.runtime.ModelRunnerCpp property)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.gather_last_token_logits">gather_last_token_logits() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.gather_nd">gather_nd() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.gegelu">gegelu() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.geglu">geglu() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.gelu">gelu() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.gemm_allreduce">gemm_allreduce() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.gemm_allreduce_plugin">gemm_allreduce_plugin (tensorrt_llm.plugin.PluginConfig attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.gemm_allreduce_plugin">(tensorrt_llm.runtime.GenerationSession property)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.gemm_allreduce_plugin">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.gemm_plugin">gemm_plugin (tensorrt_llm.plugin.PluginConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.gemm_swiglu">gemm_swiglu() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.gemm_swiglu_plugin">gemm_swiglu_plugin (tensorrt_llm.plugin.PluginConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaConfig.GEMMA2_ADDED_FIELDS">GEMMA2_ADDED_FIELDS (tensorrt_llm.models.GemmaConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaConfig.gemma2_config">gemma2_config() (tensorrt_llm.models.GemmaConfig method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaConfig.GEMMA3_ADDED_FIELDS">GEMMA3_ADDED_FIELDS (tensorrt_llm.models.GemmaConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaConfig.gemma3_config">gemma3_config() (tensorrt_llm.models.GemmaConfig method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaConfig.GEMMA_ADDED_FIELDS">GEMMA_ADDED_FIELDS (tensorrt_llm.models.GemmaConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaConfig">GemmaConfig (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaForCausalLM">GemmaForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.generate">generate() (tensorrt_llm.llmapi.LLM method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MultimodalEncoder.generate">(tensorrt_llm.llmapi.MultimodalEncoder method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.EncDecModelRunner.generate">(tensorrt_llm.runtime.EncDecModelRunner method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.generate">(tensorrt_llm.runtime.ModelRunner method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.generate">(tensorrt_llm.runtime.ModelRunnerCpp method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.generate">(tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.QWenForCausalLMGenerationSession.generate">(tensorrt_llm.runtime.QWenForCausalLMGenerationSession method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.generate_alibi_biases">generate_alibi_biases() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.generate_alibi_slopes">generate_alibi_slopes() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.generate_async">generate_async() (tensorrt_llm.llmapi.LLM method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MultimodalEncoder.generate_async">(tensorrt_llm.llmapi.MultimodalEncoder method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.generate_logn_scaling">generate_logn_scaling() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CompletionOutput.generation_logits">generation_logits (tensorrt_llm.llmapi.CompletionOutput attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSequence">GenerationSequence (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession">GenerationSession (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.get_1d_sincos_pos_embed_from_grid">get_1d_sincos_pos_embed_from_grid() (in module tensorrt_llm.layers.embedding)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.get_2d_sincos_pos_embed">get_2d_sincos_pos_embed() (in module tensorrt_llm.layers.embedding)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.get_2d_sincos_pos_embed_from_grid">get_2d_sincos_pos_embed_from_grid() (in module tensorrt_llm.layers.embedding)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.get_audio_features">get_audio_features() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSequence.get_batch_idx">get_batch_idx() (tensorrt_llm.runtime.GenerationSequence method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.KVCacheManager.get_block_offsets">get_block_offsets() (tensorrt_llm.runtime.KVCacheManager method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MpiCommSession.get_comm">get_comm() (tensorrt_llm.llmapi.MpiCommSession method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.get_config_group">get_config_group() (tensorrt_llm.models.PretrainedConfig method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DisaggregatedParams.get_context_phase_params">get_context_phase_params() (tensorrt_llm.llmapi.DisaggregatedParams method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.get_executor_config">get_executor_config() (tensorrt_llm.llmapi.TorchLlmArgs method)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.KeyValueCacheParams.get_first_past_key_value">get_first_past_key_value() (tensorrt_llm.layers.attention.KeyValueCacheParams method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaConfig.get_hf_config">get_hf_config() (tensorrt_llm.models.GemmaConfig static method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.get_kv_cache_events">get_kv_cache_events() (tensorrt_llm.llmapi.LLM method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MultimodalEncoder.get_kv_cache_events">(tensorrt_llm.llmapi.MultimodalEncoder method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.get_kv_cache_events_async">get_kv_cache_events_async() (tensorrt_llm.llmapi.LLM method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MultimodalEncoder.get_kv_cache_events_async">(tensorrt_llm.llmapi.MultimodalEncoder method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.get_next_medusa_tokens">get_next_medusa_tokens() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.get_num_heads_kv">get_num_heads_kv() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.get_parent">get_parent() (tensorrt_llm.functional.Tensor method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.get_pybind_enum_fields">get_pybind_enum_fields() (tensorrt_llm.llmapi.CacheTransceiverConfig static method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.get_pybind_enum_fields">(tensorrt_llm.llmapi.DynamicBatchConfig static method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.get_pybind_enum_fields">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig static method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.get_pybind_enum_fields">(tensorrt_llm.llmapi.KvCacheConfig static method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.get_pybind_enum_fields">(tensorrt_llm.llmapi.LookaheadDecodingConfig static method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.get_pybind_enum_fields">(tensorrt_llm.llmapi.SchedulerConfig static method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.get_pybind_variable_fields">get_pybind_variable_fields() (tensorrt_llm.llmapi.CacheTransceiverConfig static method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.get_pybind_variable_fields">(tensorrt_llm.llmapi.DynamicBatchConfig static method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.get_pybind_variable_fields">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig static method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.get_pybind_variable_fields">(tensorrt_llm.llmapi.KvCacheConfig static method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.get_pybind_variable_fields">(tensorrt_llm.llmapi.LookaheadDecodingConfig static method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.get_pybind_variable_fields">(tensorrt_llm.llmapi.SchedulerConfig static method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.get_pytorch_backend_config">get_pytorch_backend_config() (tensorrt_llm.llmapi.TorchLlmArgs method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DisaggregatedParams.get_request_type">get_request_type() (tensorrt_llm.llmapi.DisaggregatedParams method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.get_rope_index">get_rope_index() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.get_runtime_sizes">get_runtime_sizes() (tensorrt_llm.llmapi.TorchLlmArgs method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.get_runtime_sizes">(tensorrt_llm.llmapi.TrtLlmArgs method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSequence.get_seq_idx">get_seq_idx() (tensorrt_llm.runtime.GenerationSequence method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.get_stats">get_stats() (tensorrt_llm.llmapi.LLM method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MultimodalEncoder.get_stats">(tensorrt_llm.llmapi.MultimodalEncoder method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.get_stats_async">get_stats_async() (tensorrt_llm.llmapi.LLM method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MultimodalEncoder.get_stats_async">(tensorrt_llm.llmapi.MultimodalEncoder method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.get_timestep_embedding">get_timestep_embedding() (in module tensorrt_llm.layers.embedding)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.get_users">get_users() (tensorrt_llm.functional.Tensor method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.get_visual_features">get_visual_features() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.LinearBase.get_weight">get_weight() (tensorrt_llm.layers.linear.LinearBase method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.gpt_attention">gpt_attention() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.gpt_attention_plugin">gpt_attention_plugin (tensorrt_llm.plugin.PluginConfig attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.gpt_attention_plugin">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTConfig">GPTConfig (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTForCausalLM">GPTForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTJConfig">GPTJConfig (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTJForCausalLM">GPTJForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTJModel">GPTJModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTModel">GPTModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTNeoXForCausalLM">GPTNeoXForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTNeoXModel">GPTNeoXModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.gpu_weights_percent">gpu_weights_percent (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.gpus_per_node">gpus_per_node (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.gpus_per_node">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.GuidedDecodingParams.grammar">grammar (tensorrt_llm.llmapi.GuidedDecodingParams attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.greedy_sampling">greedy_sampling (tensorrt_llm.llmapi.EagleDecodingConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.group_norm">group_norm() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.group_size">group_size (tensorrt_llm.llmapi.QuantConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.normalization.GroupNorm">GroupNorm (class in tensorrt_llm.layers.normalization)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.LayerNormType.GroupNorm">(tensorrt_llm.functional.LayerNormType attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.gt">gt() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.GUARANTEED_NO_EVICT">GUARANTEED_NO_EVICT (tensorrt_llm.llmapi.CapacitySchedulerPolicy attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.guided_decoding">guided_decoding (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.guided_decoding_backend">guided_decoding_backend (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.guided_decoding_backend">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.GuidedDecodingParams">GuidedDecodingParams (class in tensorrt_llm.llmapi)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="H">H</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.handle_per_step">handle_per_step() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceParams.has_affine">has_affine() (tensorrt_llm.functional.AllReduceParams method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceParams.has_bias">has_bias() (tensorrt_llm.functional.AllReduceParams method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.has_config_group">has_config_group() (tensorrt_llm.models.PretrainedConfig method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.has_position_embedding">has_position_embedding (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.has_position_embedding">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceParams.has_scale">has_scale() (tensorrt_llm.functional.AllReduceParams method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.has_token_type_embedding">has_token_type_embedding (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.has_token_type_embedding">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.has_zero_point">has_zero_point (tensorrt_llm.llmapi.QuantConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.head_size">head_size (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.head_size">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.hidden_size">hidden_size (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.hidden_size">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.hidden_size">(tensorrt_llm.runtime.ModelRunner property)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.hidden_size">(tensorrt_llm.runtime.ModelRunnerCpp property)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.host_cache_size">host_cache_size (tensorrt_llm.llmapi.KvCacheConfig attribute)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="I">I</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.identity">identity() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.identity_plugin">identity_plugin (tensorrt_llm.plugin.PluginConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.ignore_eos">ignore_eos (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.include_stop_str_in_output">include_stop_str_in_output (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CompletionOutput.index">index (tensorrt_llm.llmapi.CompletionOutput attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.index">index() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.index">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.index">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.index">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.index">(tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.index_head_dim">index_head_dim (tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.index_n_heads">index_n_heads (tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.index_select">index_select() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.index_topk">index_topk (tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.indexer_max_chunk_size">indexer_max_chunk_size (tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.Session.infer_shapes">infer_shapes() (tensorrt_llm.runtime.Session method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.INFLIGHT">INFLIGHT (tensorrt_llm.llmapi.BatchingType attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.init_audio_encoder">init_audio_encoder() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.init_backend">init_backend() (tensorrt_llm.llmapi.TorchLlmArgs class method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.init_build_config">init_build_config() (tensorrt_llm.llmapi.TorchLlmArgs method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.init_build_config">(tensorrt_llm.llmapi.TrtLlmArgs method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.init_calib_config">init_calib_config() (tensorrt_llm.llmapi.TrtLlmArgs class method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.init_image_encoder">init_image_encoder() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.init_llm">init_llm() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.init_processor">init_processor() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.init_tokenizer">init_tokenizer() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.input_timing_cache">input_timing_cache (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.INT8">INT8 (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.int_clip">int_clip() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.interpolate">interpolate() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.is_alibi">is_alibi() (tensorrt_llm.functional.PositionEmbeddingType method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MpiCommSession.is_comm_session">is_comm_session() (tensorrt_llm.llmapi.MpiCommSession method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.is_context_fmha_enabled">is_context_fmha_enabled() (tensorrt_llm.plugin.PluginConfig method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.is_deferred">is_deferred() (tensorrt_llm.functional.PositionEmbeddingType method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.is_dynamic">is_dynamic() (tensorrt_llm.functional.Tensor method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.is_final">is_final (tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.is_gated_activation">is_gated_activation() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaConfig.is_gemma_2">is_gemma_2 (tensorrt_llm.models.GemmaConfig property)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaConfig.is_gemma_3">is_gemma_3 (tensorrt_llm.models.GemmaConfig property)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.is_keep_all">is_keep_all (tensorrt_llm.llmapi.NGramDecodingConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.is_linear_tree">is_linear_tree (tensorrt_llm.llmapi.EagleDecodingConfig property)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.is_medusa_mode">is_medusa_mode (tensorrt_llm.runtime.GenerationSession property)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.is_module_excluded_from_quantization">is_module_excluded_from_quantization() (tensorrt_llm.llmapi.QuantConfig method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.is_mrope">is_mrope() (tensorrt_llm.functional.PositionEmbeddingType method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.is_public_pool">is_public_pool (tensorrt_llm.llmapi.NGramDecodingConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.is_redrafter_mode">is_redrafter_mode (tensorrt_llm.runtime.GenerationSession property)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.is_rope">is_rope() (tensorrt_llm.functional.PositionEmbeddingType method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.is_trt_wrapper">is_trt_wrapper() (tensorrt_llm.functional.Tensor method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.is_use_oldest">is_use_oldest (tensorrt_llm.llmapi.NGramDecodingConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.MoEAllReduceParams.is_valid">is_valid() (tensorrt_llm.functional.MoEAllReduceParams method)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.AttentionParams.is_valid">(tensorrt_llm.layers.attention.AttentionParams method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.KeyValueCacheParams.is_valid">(tensorrt_llm.layers.attention.KeyValueCacheParams method)</a>
</li>
      </ul></li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.AttentionParams.is_valid_cross_attn">is_valid_cross_attn() (tensorrt_llm.layers.attention.AttentionParams method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.isalnum">isalnum() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.isalnum">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.isalnum">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.isalnum">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.isalpha">isalpha() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.isalpha">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.isalpha">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.isalpha">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.isascii">isascii() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.isascii">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.isascii">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.isascii">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.isdecimal">isdecimal() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.isdecimal">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.isdecimal">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.isdecimal">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.isdigit">isdigit() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.isdigit">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.isdigit">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.isdigit">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.isidentifier">isidentifier() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.isidentifier">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.isidentifier">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.isidentifier">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.islower">islower() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.islower">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.islower">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.islower">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.isnumeric">isnumeric() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.isnumeric">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.isnumeric">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.isnumeric">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.isprintable">isprintable() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.isprintable">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.isprintable">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.isprintable">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.isspace">isspace() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.isspace">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.isspace">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.isspace">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.istitle">istitle() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.istitle">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.istitle">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.istitle">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.isupper">isupper() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.isupper">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.isupper">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.isupper">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.iter_stats_max_iterations">iter_stats_max_iterations (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.iter_stats_max_iterations">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
  </ul></td>
</tr></table>

<h2 id="J">J</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.join">join() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.join">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.join">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.join">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.DiffusersAttention.joint_attn_forward">joint_attn_forward() (tensorrt_llm.layers.attention.DiffusersAttention method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.GuidedDecodingParams.json">json (tensorrt_llm.llmapi.GuidedDecodingParams attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.json">json() (tensorrt_llm.llmapi.AttentionDpConfig method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.json">(tensorrt_llm.llmapi.AutoDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.json">(tensorrt_llm.llmapi.BuildConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.json">(tensorrt_llm.llmapi.CacheTransceiverConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.json">(tensorrt_llm.llmapi.CalibConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig.json">(tensorrt_llm.llmapi.CudaGraphConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.json">(tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.json">(tensorrt_llm.llmapi.DraftTargetDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.json">(tensorrt_llm.llmapi.DynamicBatchConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.json">(tensorrt_llm.llmapi.EagleDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.json">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.json">(tensorrt_llm.llmapi.KvCacheConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.json">(tensorrt_llm.llmapi.LookaheadDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.json">(tensorrt_llm.llmapi.MedusaDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.json">(tensorrt_llm.llmapi.MoeConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.json">(tensorrt_llm.llmapi.MTPDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.json">(tensorrt_llm.llmapi.NGramDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.json">(tensorrt_llm.llmapi.RocketSparseAttentionConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.json">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.json">(tensorrt_llm.llmapi.SchedulerConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.json">(tensorrt_llm.llmapi.TorchCompileConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.json">(tensorrt_llm.llmapi.UserProvidedDecodingConfig method)</a>
</li>
      </ul></li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.GuidedDecodingParams.json_object">json_object (tensorrt_llm.llmapi.GuidedDecodingParams attribute)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="K">K</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.kernel_size">kernel_size (tensorrt_llm.llmapi.RocketSparseAttentionConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.KeyValueCacheParams">KeyValueCacheParams (class in tensorrt_llm.layers.attention)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.kv_cache_config">kv_cache_config (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.kv_cache_config">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.kv_cache_quant_algo">kv_cache_quant_algo (tensorrt_llm.llmapi.QuantConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.kv_cache_type">kv_cache_type (tensorrt_llm.llmapi.BuildConfig attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.kv_cache_type">(tensorrt_llm.runtime.GenerationSession property)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.kv_cache_type">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.kv_connector_config">kv_connector_config (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.kv_dtype">kv_dtype (tensorrt_llm.models.PretrainedConfig property)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.kv_transfer_timeout_ms">kv_transfer_timeout_ms (tensorrt_llm.llmapi.CacheTransceiverConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig">KvCacheConfig (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.Config">KvCacheConfig.Config (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.KVCacheManager">KVCacheManager (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheRetentionConfig">KvCacheRetentionConfig (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig">KvCacheRetentionConfig.TokenRangeRetentionConfig (class in tensorrt_llm.llmapi)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="L">L</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.LabelEmbedding">LabelEmbedding (class in tensorrt_llm.layers.embedding)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.language_adapter_config">language_adapter_config (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.last_layer">last_layer (tensorrt_llm.runtime.GenerationSession property)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceFusionOp.LAST_PROCESS_FOR_UB">LAST_PROCESS_FOR_UB (tensorrt_llm.functional.AllReduceFusionOp attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.layer_norm">layer_norm() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.layer_quant_mode">layer_quant_mode (tensorrt_llm.llmapi.QuantConfig property)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.layer_types">layer_types (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.normalization.LayerNorm">LayerNorm (class in tensorrt_llm.layers.normalization)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.LayerNormType.LayerNorm">(tensorrt_llm.functional.LayerNormType attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.layernorm_quantization_plugin">layernorm_quantization_plugin (tensorrt_llm.plugin.PluginConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.LayerNormPositionType">LayerNormPositionType (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.LayerNormType">LayerNormType (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.learned_absolute">learned_absolute (tensorrt_llm.functional.PositionEmbeddingType attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CompletionOutput.length">length (tensorrt_llm.llmapi.CompletionOutput attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#id2">(tensorrt_llm.llmapi.CompletionOutput property)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.length_penalty">length_penalty (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.length_penalty">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.Linear">Linear (class in tensorrt_llm.layers.linear)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RotaryScalingType.linear">linear (tensorrt_llm.functional.RotaryScalingType attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.LinearActivation">LinearActivation (class in tensorrt_llm.layers.mlp)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.LinearApproximateGELU">LinearApproximateGELU (class in tensorrt_llm.layers.mlp)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.LinearBase">LinearBase (class in tensorrt_llm.layers.linear)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.LinearGEGLU">LinearGEGLU (class in tensorrt_llm.layers.mlp)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.LinearGELU">LinearGELU (class in tensorrt_llm.layers.mlp)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.LinearSwiGLU">LinearSwiGLU (class in tensorrt_llm.layers.mlp)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.ljust">ljust() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.ljust">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.ljust">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.ljust">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RotaryScalingType.llama3">llama3 (tensorrt_llm.functional.RotaryScalingType attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.LLaMAConfig">LLaMAConfig (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.LLaMAForCausalLM">LLaMAForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.LLaMAModel">LLaMAModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.LlavaNextVisionConfig">LlavaNextVisionConfig (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.LlavaNextVisionWrapper">LlavaNextVisionWrapper (class in tensorrt_llm.models)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM">LLM (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.llm_engine_dir">llm_engine_dir (tensorrt_llm.runtime.MultimodalModelRunner property)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.llm_id">llm_id (tensorrt_llm.llmapi.LLM attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#id0">(tensorrt_llm.llmapi.LLM property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MultimodalEncoder.llm_id">(tensorrt_llm.llmapi.MultimodalEncoder property)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LlmArgs">LlmArgs (in module tensorrt_llm.llmapi)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedModel.load">load() (tensorrt_llm.models.PretrainedModel method)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.SD3Transformer2DModel.load">(tensorrt_llm.models.SD3Transformer2DModel method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.load_balancer">load_balancer (tensorrt_llm.llmapi.MoeConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.load_format">load_format (tensorrt_llm.llmapi.AutoDecodingConfig attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.load_format">(tensorrt_llm.llmapi.DraftTargetDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.load_format">(tensorrt_llm.llmapi.EagleDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.load_format">(tensorrt_llm.llmapi.LookaheadDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.load_format">(tensorrt_llm.llmapi.MedusaDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.load_format">(tensorrt_llm.llmapi.MTPDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.load_format">(tensorrt_llm.llmapi.NGramDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.load_format">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.load_format">(tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.load_format">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.load_format">(tensorrt_llm.llmapi.UserProvidedDecodingConfig attribute)</a>
</li>
      </ul></li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.load_test_audio">load_test_audio() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.load_test_data">load_test_data() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.locate_accepted_draft_tokens">locate_accepted_draft_tokens() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.location">location (tensorrt_llm.functional.Tensor property)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.log">log() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.log">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.log_field_changes">log_field_changes() (tensorrt_llm.plugin.PluginConfig class method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.log_softmax">log_softmax() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.logits_processor">logits_processor (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.LogitsProcessor">LogitsProcessor (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.LogitsProcessorList">LogitsProcessorList (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CompletionOutput.logprobs">logprobs (tensorrt_llm.llmapi.CompletionOutput attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.logprobs">(tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CompletionOutput.logprobs_diff">logprobs_diff (tensorrt_llm.llmapi.CompletionOutput attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#id3">(tensorrt_llm.llmapi.CompletionOutput property)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.long_rope">long_rope (tensorrt_llm.functional.PositionEmbeddingType attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RotaryScalingType.longrope">longrope (tensorrt_llm.functional.RotaryScalingType attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.lookahead_config">lookahead_config (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.SpeculativeDecodingMode.LOOKAHEAD_DECODING">LOOKAHEAD_DECODING (tensorrt_llm.models.SpeculativeDecodingMode attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig">LookaheadDecodingConfig (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.Config">LookaheadDecodingConfig.Config (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LoRARequest.lora_ckpt_source">lora_ckpt_source (tensorrt_llm.llmapi.LoRARequest attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.lora_config">lora_config (tensorrt_llm.llmapi.BuildConfig attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.lora_config">(tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.lora_config">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LoRARequest.lora_int_id">lora_int_id (tensorrt_llm.llmapi.LoRARequest attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LoRARequest.lora_name">lora_name (tensorrt_llm.llmapi.LoRARequest attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LoRARequest.lora_path">lora_path (tensorrt_llm.llmapi.LoRARequest attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.lora_plugin">lora_plugin (tensorrt_llm.plugin.PluginConfig attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.lora_plugin">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.lora_plugin">lora_plugin() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.lora_target_modules">lora_target_modules (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LoRARequest">LoRARequest (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.low_latency_gemm">low_latency_gemm() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.low_latency_gemm_plugin">low_latency_gemm_plugin (tensorrt_llm.plugin.PluginConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.low_latency_gemm_swiglu">low_latency_gemm_swiglu() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.low_latency_gemm_swiglu_plugin">low_latency_gemm_swiglu_plugin (tensorrt_llm.plugin.PluginConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.lower">lower() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.lower">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.lower">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.lower">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceStrategy.LOWPRECISION">LOWPRECISION (tensorrt_llm.functional.AllReduceStrategy attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.lstrip">lstrip() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.lstrip">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.lstrip">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.lstrip">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.lt">lt() (in module tensorrt_llm.functional)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="M">M</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.make_causal_mask">make_causal_mask() (in module tensorrt_llm.layers.attention)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.maketrans">maketrans() (tensorrt_llm.llmapi.BatchingType static method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.maketrans">(tensorrt_llm.llmapi.CapacitySchedulerPolicy static method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.maketrans">(tensorrt_llm.llmapi.ContextChunkingPolicy static method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.maketrans">(tensorrt_llm.llmapi.QuantAlgo static method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.mamba_conv1d">mamba_conv1d() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.mamba_conv1d_plugin">mamba_conv1d_plugin (tensorrt_llm.plugin.PluginConfig attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.mamba_conv1d_plugin">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.mamba_ssm_cache_dtype">mamba_ssm_cache_dtype (tensorrt_llm.llmapi.KvCacheConfig attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.mamba_ssm_cache_dtype">(tensorrt_llm.llmapi.QuantConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.MambaForCausalLM">MambaForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.manage_weights">manage_weights (tensorrt_llm.plugin.PluginConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.mapping">mapping (tensorrt_llm.runtime.GenerationSession attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.mapping">(tensorrt_llm.runtime.ModelRunner property)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.mark_output">mark_output() (tensorrt_llm.functional.Tensor method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.masked_scatter">masked_scatter() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.masked_select">masked_select() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.matmul">matmul() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.max">max() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.max">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.max_attention_window">max_attention_window (tensorrt_llm.llmapi.KvCacheConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.max_attention_window_size">max_attention_window_size (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.max_batch_size">max_batch_size (tensorrt_llm.llmapi.BuildConfig attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig.max_batch_size">(tensorrt_llm.llmapi.CudaGraphConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.max_batch_size">(tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.max_batch_size">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.max_batch_size">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.max_beam_width">max_beam_width (tensorrt_llm.llmapi.BuildConfig attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.max_beam_width">(tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.max_beam_width">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.max_beam_width">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildCacheConfig.max_cache_storage_gb">max_cache_storage_gb (tensorrt_llm.llmapi.BuildCacheConfig attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#id14">(tensorrt_llm.llmapi.BuildCacheConfig property)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.max_concurrency">max_concurrency (tensorrt_llm.llmapi.AutoDecodingConfig attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.max_concurrency">(tensorrt_llm.llmapi.DraftTargetDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.max_concurrency">(tensorrt_llm.llmapi.EagleDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.max_concurrency">(tensorrt_llm.llmapi.LookaheadDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.max_concurrency">(tensorrt_llm.llmapi.MedusaDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.max_concurrency">(tensorrt_llm.llmapi.MTPDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.max_concurrency">(tensorrt_llm.llmapi.NGramDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.max_concurrency">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.max_concurrency">(tensorrt_llm.llmapi.UserProvidedDecodingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.max_draft_len">max_draft_len (tensorrt_llm.llmapi.AutoDecodingConfig attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.max_draft_len">(tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.max_draft_len">(tensorrt_llm.llmapi.DraftTargetDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.max_draft_len">(tensorrt_llm.llmapi.EagleDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.max_draft_len">(tensorrt_llm.llmapi.LookaheadDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.max_draft_len">(tensorrt_llm.llmapi.MedusaDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.max_draft_len">(tensorrt_llm.llmapi.MTPDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.max_draft_len">(tensorrt_llm.llmapi.NGramDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.max_draft_len">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.max_draft_len">(tensorrt_llm.llmapi.UserProvidedDecodingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.max_draft_tokens">max_draft_tokens (tensorrt_llm.runtime.GenerationSession property)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.max_encoder_input_len">max_encoder_input_len (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.max_gpu_total_bytes">max_gpu_total_bytes (tensorrt_llm.llmapi.KvCacheConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.max_input_len">max_input_len (tensorrt_llm.llmapi.BuildConfig attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.max_input_len">(tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.max_input_len">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.max_matching_ngram_size">max_matching_ngram_size (tensorrt_llm.llmapi.NGramDecodingConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.max_medusa_tokens">max_medusa_tokens (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.max_new_tokens">max_new_tokens (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.max_ngram_size">max_ngram_size (tensorrt_llm.llmapi.LookaheadDecodingConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.max_non_leaves_per_layer">max_non_leaves_per_layer (tensorrt_llm.llmapi.EagleDecodingConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.max_num_streams">max_num_streams (tensorrt_llm.llmapi.TorchCompileConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.max_num_tokens">max_num_tokens (tensorrt_llm.llmapi.BuildConfig attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.max_num_tokens">(tensorrt_llm.llmapi.MoeConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.max_num_tokens">(tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.max_num_tokens">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.max_prompt_adapter_token">max_prompt_adapter_token (tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.max_prompt_embedding_table_size">max_prompt_embedding_table_size (tensorrt_llm.llmapi.BuildConfig attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.max_prompt_embedding_table_size">(tensorrt_llm.runtime.GenerationSession property)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.max_prompt_embedding_table_size">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.max_prompt_embedding_table_size">(tensorrt_llm.runtime.ModelRunner property)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.max_prompt_embedding_table_size">(tensorrt_llm.runtime.ModelRunnerCpp property)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildCacheConfig.max_records">max_records (tensorrt_llm.llmapi.BuildCacheConfig attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#id15">(tensorrt_llm.llmapi.BuildCacheConfig property)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.max_seq_len">max_seq_len (tensorrt_llm.llmapi.BuildConfig attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.max_seq_len">(tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.max_seq_len">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.max_sequence_length">max_sequence_length (tensorrt_llm.runtime.ModelRunner property)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.max_sequence_length">(tensorrt_llm.runtime.ModelRunnerCpp property)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.max_tokens">max_tokens (tensorrt_llm.llmapi.KvCacheConfig attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.max_tokens">(tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.max_tokens_in_buffer">max_tokens_in_buffer (tensorrt_llm.llmapi.CacheTransceiverConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.max_total_draft_tokens">max_total_draft_tokens (tensorrt_llm.llmapi.AutoDecodingConfig attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.max_total_draft_tokens">(tensorrt_llm.llmapi.DraftTargetDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.max_total_draft_tokens">(tensorrt_llm.llmapi.EagleDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.max_total_draft_tokens">(tensorrt_llm.llmapi.LookaheadDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.max_total_draft_tokens">(tensorrt_llm.llmapi.MedusaDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.max_total_draft_tokens">(tensorrt_llm.llmapi.MTPDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.max_total_draft_tokens">(tensorrt_llm.llmapi.NGramDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.max_total_draft_tokens">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.max_total_draft_tokens">(tensorrt_llm.llmapi.UserProvidedDecodingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.MAX_UTILIZATION">MAX_UTILIZATION (tensorrt_llm.llmapi.CapacitySchedulerPolicy attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.max_verification_set_size">max_verification_set_size (tensorrt_llm.llmapi.LookaheadDecodingConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.max_window_size">max_window_size (tensorrt_llm.llmapi.LookaheadDecodingConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.maximum">maximum() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.maybe_to_pybind">maybe_to_pybind() (tensorrt_llm.llmapi.CacheTransceiverConfig static method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.maybe_to_pybind">(tensorrt_llm.llmapi.DynamicBatchConfig static method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.maybe_to_pybind">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig static method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.maybe_to_pybind">(tensorrt_llm.llmapi.KvCacheConfig static method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.maybe_to_pybind">(tensorrt_llm.llmapi.LookaheadDecodingConfig static method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.maybe_to_pybind">(tensorrt_llm.llmapi.SchedulerConfig static method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.mean">mean() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.mean">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.SpeculativeDecodingMode.MEDUSA">MEDUSA (tensorrt_llm.models.SpeculativeDecodingMode attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.medusa_choices">medusa_choices (tensorrt_llm.llmapi.MedusaDecodingConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.medusa_decode_and_verify">medusa_decode_and_verify() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.medusa_paths">medusa_paths (tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.medusa_position_offsets">medusa_position_offsets (tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.medusa_temperature">medusa_temperature (tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.medusa_topks">medusa_topks (tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.medusa_tree_ids">medusa_tree_ids (tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.MedusaConfig">MedusaConfig (class in tensorrt_llm.models)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig">MedusaDecodingConfig (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.Config">MedusaDecodingConfig.Config (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.MedusaForCausalLm">MedusaForCausalLm (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.meshgrid2d">meshgrid2d() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.metrics">metrics (tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.min">min() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceStrategy.MIN_LATENCY">MIN_LATENCY (tensorrt_llm.functional.AllReduceStrategy attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.min_length">min_length (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.min_p">min_p (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.min_p">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.min_tokens">min_tokens (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.minimum">minimum() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.mirror_pybind_enum">mirror_pybind_enum() (tensorrt_llm.llmapi.CacheTransceiverConfig static method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.mirror_pybind_enum">(tensorrt_llm.llmapi.DynamicBatchConfig static method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.mirror_pybind_enum">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig static method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.mirror_pybind_enum">(tensorrt_llm.llmapi.KvCacheConfig static method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.mirror_pybind_enum">(tensorrt_llm.llmapi.LookaheadDecodingConfig static method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.mirror_pybind_enum">(tensorrt_llm.llmapi.SchedulerConfig static method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.mirror_pybind_fields">mirror_pybind_fields() (tensorrt_llm.llmapi.CacheTransceiverConfig static method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.mirror_pybind_fields">(tensorrt_llm.llmapi.DynamicBatchConfig static method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.mirror_pybind_fields">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig static method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.mirror_pybind_fields">(tensorrt_llm.llmapi.KvCacheConfig static method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.mirror_pybind_fields">(tensorrt_llm.llmapi.LookaheadDecodingConfig static method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.mirror_pybind_fields">(tensorrt_llm.llmapi.SchedulerConfig static method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.activation.Mish">Mish (class in tensorrt_llm.layers.activation)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.MIXED_PRECISION">MIXED_PRECISION (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.MLLaMAForCausalLM">MLLaMAForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.MLP">MLP (class in tensorrt_llm.layers.mlp)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.MLPType.MLP">(tensorrt_llm.functional.MLPType attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.MLPType">MLPType (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.mm_embedding_handle">mm_embedding_handle (tensorrt_llm.llmapi.RequestOutput attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#id8">(tensorrt_llm.llmapi.RequestOutput property)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.mm_encoder_only">mm_encoder_only (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceStrategy.MNNVL">MNNVL (tensorrt_llm.functional.AllReduceStrategy attribute)</a>
</li>
      <li>
    MODEL

      <ul>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-mm_embedding_serve-arg-MODEL">trtllm-serve-mm_embedding_serve command line option</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-arg-MODEL">trtllm-serve-serve command line option</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.model">model (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.model">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.model_computed_fields">model_computed_fields (tensorrt_llm.llmapi.AttentionDpConfig attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.model_computed_fields">(tensorrt_llm.llmapi.AutoDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.model_computed_fields">(tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.model_computed_fields">(tensorrt_llm.llmapi.CacheTransceiverConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.model_computed_fields">(tensorrt_llm.llmapi.CalibConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig.model_computed_fields">(tensorrt_llm.llmapi.CudaGraphConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_computed_fields">(tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_computed_fields">(tensorrt_llm.llmapi.DraftTargetDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.model_computed_fields">(tensorrt_llm.llmapi.DynamicBatchConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.model_computed_fields">(tensorrt_llm.llmapi.EagleDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_computed_fields">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.model_computed_fields">(tensorrt_llm.llmapi.KvCacheConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.model_computed_fields">(tensorrt_llm.llmapi.LookaheadDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.model_computed_fields">(tensorrt_llm.llmapi.MedusaDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.model_computed_fields">(tensorrt_llm.llmapi.MoeConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.model_computed_fields">(tensorrt_llm.llmapi.MTPDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.model_computed_fields">(tensorrt_llm.llmapi.NGramDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_computed_fields">(tensorrt_llm.llmapi.RocketSparseAttentionConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_computed_fields">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.model_computed_fields">(tensorrt_llm.llmapi.SchedulerConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.model_computed_fields">(tensorrt_llm.llmapi.TorchCompileConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_computed_fields">(tensorrt_llm.llmapi.UserProvidedDecodingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.model_config">model_config (tensorrt_llm.llmapi.AttentionDpConfig attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.model_config">(tensorrt_llm.llmapi.AutoDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.model_config">(tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.model_config">(tensorrt_llm.llmapi.CacheTransceiverConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.model_config">(tensorrt_llm.llmapi.CalibConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig.model_config">(tensorrt_llm.llmapi.CudaGraphConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_config">(tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_config">(tensorrt_llm.llmapi.DraftTargetDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.model_config">(tensorrt_llm.llmapi.DynamicBatchConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.model_config">(tensorrt_llm.llmapi.EagleDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_config">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.model_config">(tensorrt_llm.llmapi.KvCacheConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.model_config">(tensorrt_llm.llmapi.LookaheadDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.model_config">(tensorrt_llm.llmapi.MedusaDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.model_config">(tensorrt_llm.llmapi.MoeConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.model_config">(tensorrt_llm.llmapi.MTPDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.model_config">(tensorrt_llm.llmapi.NGramDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_config">(tensorrt_llm.llmapi.RocketSparseAttentionConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_config">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.model_config">(tensorrt_llm.llmapi.SchedulerConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.model_config">(tensorrt_llm.llmapi.TorchCompileConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_config">(tensorrt_llm.llmapi.UserProvidedDecodingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.model_construct">model_construct() (tensorrt_llm.llmapi.AttentionDpConfig class method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.model_construct">(tensorrt_llm.llmapi.AutoDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.model_construct">(tensorrt_llm.llmapi.BuildConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.model_construct">(tensorrt_llm.llmapi.CacheTransceiverConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.model_construct">(tensorrt_llm.llmapi.CalibConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig.model_construct">(tensorrt_llm.llmapi.CudaGraphConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_construct">(tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_construct">(tensorrt_llm.llmapi.DraftTargetDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.model_construct">(tensorrt_llm.llmapi.DynamicBatchConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.model_construct">(tensorrt_llm.llmapi.EagleDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_construct">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.model_construct">(tensorrt_llm.llmapi.KvCacheConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.model_construct">(tensorrt_llm.llmapi.LookaheadDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.model_construct">(tensorrt_llm.llmapi.MedusaDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.model_construct">(tensorrt_llm.llmapi.MoeConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.model_construct">(tensorrt_llm.llmapi.MTPDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.model_construct">(tensorrt_llm.llmapi.NGramDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_construct">(tensorrt_llm.llmapi.RocketSparseAttentionConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_construct">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.model_construct">(tensorrt_llm.llmapi.SchedulerConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.model_construct">(tensorrt_llm.llmapi.TorchCompileConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_construct">(tensorrt_llm.llmapi.UserProvidedDecodingConfig class method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.model_copy">model_copy() (tensorrt_llm.llmapi.AttentionDpConfig method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.model_copy">(tensorrt_llm.llmapi.AutoDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.model_copy">(tensorrt_llm.llmapi.BuildConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.model_copy">(tensorrt_llm.llmapi.CacheTransceiverConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.model_copy">(tensorrt_llm.llmapi.CalibConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig.model_copy">(tensorrt_llm.llmapi.CudaGraphConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_copy">(tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_copy">(tensorrt_llm.llmapi.DraftTargetDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.model_copy">(tensorrt_llm.llmapi.DynamicBatchConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.model_copy">(tensorrt_llm.llmapi.EagleDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_copy">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.model_copy">(tensorrt_llm.llmapi.KvCacheConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.model_copy">(tensorrt_llm.llmapi.LookaheadDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.model_copy">(tensorrt_llm.llmapi.MedusaDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.model_copy">(tensorrt_llm.llmapi.MoeConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.model_copy">(tensorrt_llm.llmapi.MTPDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.model_copy">(tensorrt_llm.llmapi.NGramDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_copy">(tensorrt_llm.llmapi.RocketSparseAttentionConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_copy">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.model_copy">(tensorrt_llm.llmapi.SchedulerConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.model_copy">(tensorrt_llm.llmapi.TorchCompileConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_copy">(tensorrt_llm.llmapi.UserProvidedDecodingConfig method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.model_dump">model_dump() (tensorrt_llm.llmapi.AttentionDpConfig method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.model_dump">(tensorrt_llm.llmapi.AutoDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.model_dump">(tensorrt_llm.llmapi.BuildConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.model_dump">(tensorrt_llm.llmapi.CacheTransceiverConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.model_dump">(tensorrt_llm.llmapi.CalibConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig.model_dump">(tensorrt_llm.llmapi.CudaGraphConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_dump">(tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_dump">(tensorrt_llm.llmapi.DraftTargetDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.model_dump">(tensorrt_llm.llmapi.DynamicBatchConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.model_dump">(tensorrt_llm.llmapi.EagleDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_dump">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.model_dump">(tensorrt_llm.llmapi.KvCacheConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.model_dump">(tensorrt_llm.llmapi.LookaheadDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.model_dump">(tensorrt_llm.llmapi.MedusaDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.model_dump">(tensorrt_llm.llmapi.MoeConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.model_dump">(tensorrt_llm.llmapi.MTPDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.model_dump">(tensorrt_llm.llmapi.NGramDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_dump">(tensorrt_llm.llmapi.RocketSparseAttentionConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_dump">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.model_dump">(tensorrt_llm.llmapi.SchedulerConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.model_dump">(tensorrt_llm.llmapi.TorchCompileConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_dump">(tensorrt_llm.llmapi.UserProvidedDecodingConfig method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.model_dump_json">model_dump_json() (tensorrt_llm.llmapi.AttentionDpConfig method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.model_dump_json">(tensorrt_llm.llmapi.AutoDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.model_dump_json">(tensorrt_llm.llmapi.BuildConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.model_dump_json">(tensorrt_llm.llmapi.CacheTransceiverConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.model_dump_json">(tensorrt_llm.llmapi.CalibConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig.model_dump_json">(tensorrt_llm.llmapi.CudaGraphConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_dump_json">(tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_dump_json">(tensorrt_llm.llmapi.DraftTargetDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.model_dump_json">(tensorrt_llm.llmapi.DynamicBatchConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.model_dump_json">(tensorrt_llm.llmapi.EagleDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_dump_json">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.model_dump_json">(tensorrt_llm.llmapi.KvCacheConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.model_dump_json">(tensorrt_llm.llmapi.LookaheadDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.model_dump_json">(tensorrt_llm.llmapi.MedusaDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.model_dump_json">(tensorrt_llm.llmapi.MoeConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.model_dump_json">(tensorrt_llm.llmapi.MTPDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.model_dump_json">(tensorrt_llm.llmapi.NGramDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_dump_json">(tensorrt_llm.llmapi.RocketSparseAttentionConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_dump_json">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.model_dump_json">(tensorrt_llm.llmapi.SchedulerConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.model_dump_json">(tensorrt_llm.llmapi.TorchCompileConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_dump_json">(tensorrt_llm.llmapi.UserProvidedDecodingConfig method)</a>
</li>
      </ul></li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.model_extra">model_extra (tensorrt_llm.llmapi.AttentionDpConfig property)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.model_extra">(tensorrt_llm.llmapi.AutoDecodingConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.model_extra">(tensorrt_llm.llmapi.BuildConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.model_extra">(tensorrt_llm.llmapi.CacheTransceiverConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.model_extra">(tensorrt_llm.llmapi.CalibConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig.model_extra">(tensorrt_llm.llmapi.CudaGraphConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_extra">(tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_extra">(tensorrt_llm.llmapi.DraftTargetDecodingConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.model_extra">(tensorrt_llm.llmapi.DynamicBatchConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.model_extra">(tensorrt_llm.llmapi.EagleDecodingConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_extra">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.model_extra">(tensorrt_llm.llmapi.KvCacheConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.model_extra">(tensorrt_llm.llmapi.LookaheadDecodingConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.model_extra">(tensorrt_llm.llmapi.MedusaDecodingConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.model_extra">(tensorrt_llm.llmapi.MoeConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.model_extra">(tensorrt_llm.llmapi.MTPDecodingConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.model_extra">(tensorrt_llm.llmapi.NGramDecodingConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_extra">(tensorrt_llm.llmapi.RocketSparseAttentionConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_extra">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.model_extra">(tensorrt_llm.llmapi.SchedulerConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.model_extra">(tensorrt_llm.llmapi.TorchCompileConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_extra">(tensorrt_llm.llmapi.UserProvidedDecodingConfig property)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.model_fields">model_fields (tensorrt_llm.llmapi.AttentionDpConfig attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.model_fields">(tensorrt_llm.llmapi.AutoDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.model_fields">(tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.model_fields">(tensorrt_llm.llmapi.CacheTransceiverConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.model_fields">(tensorrt_llm.llmapi.CalibConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig.model_fields">(tensorrt_llm.llmapi.CudaGraphConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_fields">(tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_fields">(tensorrt_llm.llmapi.DraftTargetDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.model_fields">(tensorrt_llm.llmapi.DynamicBatchConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.model_fields">(tensorrt_llm.llmapi.EagleDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_fields">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.model_fields">(tensorrt_llm.llmapi.KvCacheConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.model_fields">(tensorrt_llm.llmapi.LookaheadDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.model_fields">(tensorrt_llm.llmapi.MedusaDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.model_fields">(tensorrt_llm.llmapi.MoeConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.model_fields">(tensorrt_llm.llmapi.MTPDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.model_fields">(tensorrt_llm.llmapi.NGramDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_fields">(tensorrt_llm.llmapi.RocketSparseAttentionConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_fields">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.model_fields">(tensorrt_llm.llmapi.SchedulerConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.model_fields">(tensorrt_llm.llmapi.TorchCompileConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_fields">(tensorrt_llm.llmapi.UserProvidedDecodingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.model_fields_set">model_fields_set (tensorrt_llm.llmapi.AttentionDpConfig property)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.model_fields_set">(tensorrt_llm.llmapi.AutoDecodingConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.model_fields_set">(tensorrt_llm.llmapi.BuildConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.model_fields_set">(tensorrt_llm.llmapi.CacheTransceiverConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.model_fields_set">(tensorrt_llm.llmapi.CalibConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig.model_fields_set">(tensorrt_llm.llmapi.CudaGraphConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_fields_set">(tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_fields_set">(tensorrt_llm.llmapi.DraftTargetDecodingConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.model_fields_set">(tensorrt_llm.llmapi.DynamicBatchConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.model_fields_set">(tensorrt_llm.llmapi.EagleDecodingConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_fields_set">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.model_fields_set">(tensorrt_llm.llmapi.KvCacheConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.model_fields_set">(tensorrt_llm.llmapi.LookaheadDecodingConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.model_fields_set">(tensorrt_llm.llmapi.MedusaDecodingConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.model_fields_set">(tensorrt_llm.llmapi.MoeConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.model_fields_set">(tensorrt_llm.llmapi.MTPDecodingConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.model_fields_set">(tensorrt_llm.llmapi.NGramDecodingConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_fields_set">(tensorrt_llm.llmapi.RocketSparseAttentionConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_fields_set">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.model_fields_set">(tensorrt_llm.llmapi.SchedulerConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.model_fields_set">(tensorrt_llm.llmapi.TorchCompileConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_fields_set">(tensorrt_llm.llmapi.UserProvidedDecodingConfig property)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.model_format">model_format (tensorrt_llm.llmapi.TorchLlmArgs property)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.model_format">(tensorrt_llm.llmapi.TrtLlmArgs property)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.model_json_schema">model_json_schema() (tensorrt_llm.llmapi.AttentionDpConfig class method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.model_json_schema">(tensorrt_llm.llmapi.AutoDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.model_json_schema">(tensorrt_llm.llmapi.BuildConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.model_json_schema">(tensorrt_llm.llmapi.CacheTransceiverConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.model_json_schema">(tensorrt_llm.llmapi.CalibConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig.model_json_schema">(tensorrt_llm.llmapi.CudaGraphConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_json_schema">(tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_json_schema">(tensorrt_llm.llmapi.DraftTargetDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.model_json_schema">(tensorrt_llm.llmapi.DynamicBatchConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.model_json_schema">(tensorrt_llm.llmapi.EagleDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_json_schema">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.model_json_schema">(tensorrt_llm.llmapi.KvCacheConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.model_json_schema">(tensorrt_llm.llmapi.LookaheadDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.model_json_schema">(tensorrt_llm.llmapi.MedusaDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.model_json_schema">(tensorrt_llm.llmapi.MoeConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.model_json_schema">(tensorrt_llm.llmapi.MTPDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.model_json_schema">(tensorrt_llm.llmapi.NGramDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_json_schema">(tensorrt_llm.llmapi.RocketSparseAttentionConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_json_schema">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.model_json_schema">(tensorrt_llm.llmapi.SchedulerConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.model_json_schema">(tensorrt_llm.llmapi.TorchCompileConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_json_schema">(tensorrt_llm.llmapi.UserProvidedDecodingConfig class method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.model_name">model_name (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.model_parametrized_name">model_parametrized_name() (tensorrt_llm.llmapi.AttentionDpConfig class method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.model_parametrized_name">(tensorrt_llm.llmapi.AutoDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.model_parametrized_name">(tensorrt_llm.llmapi.BuildConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.model_parametrized_name">(tensorrt_llm.llmapi.CacheTransceiverConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.model_parametrized_name">(tensorrt_llm.llmapi.CalibConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig.model_parametrized_name">(tensorrt_llm.llmapi.CudaGraphConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_parametrized_name">(tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_parametrized_name">(tensorrt_llm.llmapi.DraftTargetDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.model_parametrized_name">(tensorrt_llm.llmapi.DynamicBatchConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.model_parametrized_name">(tensorrt_llm.llmapi.EagleDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_parametrized_name">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.model_parametrized_name">(tensorrt_llm.llmapi.KvCacheConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.model_parametrized_name">(tensorrt_llm.llmapi.LookaheadDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.model_parametrized_name">(tensorrt_llm.llmapi.MedusaDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.model_parametrized_name">(tensorrt_llm.llmapi.MoeConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.model_parametrized_name">(tensorrt_llm.llmapi.MTPDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.model_parametrized_name">(tensorrt_llm.llmapi.NGramDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_parametrized_name">(tensorrt_llm.llmapi.RocketSparseAttentionConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_parametrized_name">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.model_parametrized_name">(tensorrt_llm.llmapi.SchedulerConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.model_parametrized_name">(tensorrt_llm.llmapi.TorchCompileConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_parametrized_name">(tensorrt_llm.llmapi.UserProvidedDecodingConfig class method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.model_post_init">model_post_init() (tensorrt_llm.llmapi.AttentionDpConfig method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.model_post_init">(tensorrt_llm.llmapi.AutoDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.model_post_init">(tensorrt_llm.llmapi.BuildConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.model_post_init">(tensorrt_llm.llmapi.CacheTransceiverConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.model_post_init">(tensorrt_llm.llmapi.CalibConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig.model_post_init">(tensorrt_llm.llmapi.CudaGraphConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_post_init">(tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_post_init">(tensorrt_llm.llmapi.DraftTargetDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.model_post_init">(tensorrt_llm.llmapi.DynamicBatchConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.model_post_init">(tensorrt_llm.llmapi.EagleDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_post_init">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.model_post_init">(tensorrt_llm.llmapi.KvCacheConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.model_post_init">(tensorrt_llm.llmapi.LookaheadDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.model_post_init">(tensorrt_llm.llmapi.MedusaDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.model_post_init">(tensorrt_llm.llmapi.MoeConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.model_post_init">(tensorrt_llm.llmapi.MTPDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.model_post_init">(tensorrt_llm.llmapi.NGramDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_post_init">(tensorrt_llm.llmapi.RocketSparseAttentionConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_post_init">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.model_post_init">(tensorrt_llm.llmapi.SchedulerConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.model_post_init">(tensorrt_llm.llmapi.TorchCompileConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_post_init">(tensorrt_llm.llmapi.UserProvidedDecodingConfig method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.model_post_init">(tensorrt_llm.plugin.PluginConfig method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.model_rebuild">model_rebuild() (tensorrt_llm.llmapi.AttentionDpConfig class method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.model_rebuild">(tensorrt_llm.llmapi.AutoDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.model_rebuild">(tensorrt_llm.llmapi.BuildConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.model_rebuild">(tensorrt_llm.llmapi.CacheTransceiverConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.model_rebuild">(tensorrt_llm.llmapi.CalibConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig.model_rebuild">(tensorrt_llm.llmapi.CudaGraphConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_rebuild">(tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_rebuild">(tensorrt_llm.llmapi.DraftTargetDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.model_rebuild">(tensorrt_llm.llmapi.DynamicBatchConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.model_rebuild">(tensorrt_llm.llmapi.EagleDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_rebuild">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.model_rebuild">(tensorrt_llm.llmapi.KvCacheConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.model_rebuild">(tensorrt_llm.llmapi.LookaheadDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.model_rebuild">(tensorrt_llm.llmapi.MedusaDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.model_rebuild">(tensorrt_llm.llmapi.MoeConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.model_rebuild">(tensorrt_llm.llmapi.MTPDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.model_rebuild">(tensorrt_llm.llmapi.NGramDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_rebuild">(tensorrt_llm.llmapi.RocketSparseAttentionConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_rebuild">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.model_rebuild">(tensorrt_llm.llmapi.SchedulerConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.model_rebuild">(tensorrt_llm.llmapi.TorchCompileConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_rebuild">(tensorrt_llm.llmapi.UserProvidedDecodingConfig class method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.model_validate">model_validate() (tensorrt_llm.llmapi.AttentionDpConfig class method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.model_validate">(tensorrt_llm.llmapi.AutoDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.model_validate">(tensorrt_llm.llmapi.BuildConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.model_validate">(tensorrt_llm.llmapi.CacheTransceiverConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.model_validate">(tensorrt_llm.llmapi.CalibConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig.model_validate">(tensorrt_llm.llmapi.CudaGraphConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_validate">(tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_validate">(tensorrt_llm.llmapi.DraftTargetDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.model_validate">(tensorrt_llm.llmapi.DynamicBatchConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.model_validate">(tensorrt_llm.llmapi.EagleDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_validate">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.model_validate">(tensorrt_llm.llmapi.KvCacheConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.model_validate">(tensorrt_llm.llmapi.LookaheadDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.model_validate">(tensorrt_llm.llmapi.MedusaDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.model_validate">(tensorrt_llm.llmapi.MoeConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.model_validate">(tensorrt_llm.llmapi.MTPDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.model_validate">(tensorrt_llm.llmapi.NGramDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_validate">(tensorrt_llm.llmapi.RocketSparseAttentionConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_validate">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.model_validate">(tensorrt_llm.llmapi.SchedulerConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.model_validate">(tensorrt_llm.llmapi.TorchCompileConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_validate">(tensorrt_llm.llmapi.UserProvidedDecodingConfig class method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.model_validate_json">model_validate_json() (tensorrt_llm.llmapi.AttentionDpConfig class method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.model_validate_json">(tensorrt_llm.llmapi.AutoDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.model_validate_json">(tensorrt_llm.llmapi.BuildConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.model_validate_json">(tensorrt_llm.llmapi.CacheTransceiverConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.model_validate_json">(tensorrt_llm.llmapi.CalibConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig.model_validate_json">(tensorrt_llm.llmapi.CudaGraphConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_validate_json">(tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_validate_json">(tensorrt_llm.llmapi.DraftTargetDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.model_validate_json">(tensorrt_llm.llmapi.DynamicBatchConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.model_validate_json">(tensorrt_llm.llmapi.EagleDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_validate_json">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.model_validate_json">(tensorrt_llm.llmapi.KvCacheConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.model_validate_json">(tensorrt_llm.llmapi.LookaheadDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.model_validate_json">(tensorrt_llm.llmapi.MedusaDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.model_validate_json">(tensorrt_llm.llmapi.MoeConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.model_validate_json">(tensorrt_llm.llmapi.MTPDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.model_validate_json">(tensorrt_llm.llmapi.NGramDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_validate_json">(tensorrt_llm.llmapi.RocketSparseAttentionConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_validate_json">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.model_validate_json">(tensorrt_llm.llmapi.SchedulerConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.model_validate_json">(tensorrt_llm.llmapi.TorchCompileConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_validate_json">(tensorrt_llm.llmapi.UserProvidedDecodingConfig class method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.model_validate_strings">model_validate_strings() (tensorrt_llm.llmapi.AttentionDpConfig class method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.model_validate_strings">(tensorrt_llm.llmapi.AutoDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.model_validate_strings">(tensorrt_llm.llmapi.BuildConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.model_validate_strings">(tensorrt_llm.llmapi.CacheTransceiverConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.model_validate_strings">(tensorrt_llm.llmapi.CalibConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig.model_validate_strings">(tensorrt_llm.llmapi.CudaGraphConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_validate_strings">(tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_validate_strings">(tensorrt_llm.llmapi.DraftTargetDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.model_validate_strings">(tensorrt_llm.llmapi.DynamicBatchConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.model_validate_strings">(tensorrt_llm.llmapi.EagleDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_validate_strings">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.model_validate_strings">(tensorrt_llm.llmapi.KvCacheConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.model_validate_strings">(tensorrt_llm.llmapi.LookaheadDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.model_validate_strings">(tensorrt_llm.llmapi.MedusaDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.model_validate_strings">(tensorrt_llm.llmapi.MoeConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.model_validate_strings">(tensorrt_llm.llmapi.MTPDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.model_validate_strings">(tensorrt_llm.llmapi.NGramDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_validate_strings">(tensorrt_llm.llmapi.RocketSparseAttentionConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_validate_strings">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.model_validate_strings">(tensorrt_llm.llmapi.SchedulerConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.model_validate_strings">(tensorrt_llm.llmapi.TorchCompileConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_validate_strings">(tensorrt_llm.llmapi.UserProvidedDecodingConfig class method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig">ModelConfig (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner">ModelRunner (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp">ModelRunnerCpp (class in tensorrt_llm.runtime)</a>
</li>
      <li>
    module

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.functional.html#module-tensorrt_llm">tensorrt_llm</a>, <a href="legacy/python-api/tensorrt_llm.layers.html#module-tensorrt_llm">[1]</a>, <a href="legacy/python-api/tensorrt_llm.models.html#module-tensorrt_llm">[2]</a>, <a href="legacy/python-api/tensorrt_llm.plugin.html#module-tensorrt_llm">[3]</a>, <a href="legacy/python-api/tensorrt_llm.quantization.html#module-tensorrt_llm">[4]</a>, <a href="legacy/python-api/tensorrt_llm.runtime.html#module-tensorrt_llm">[5]</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.functional.html#module-tensorrt_llm.functional">tensorrt_llm.functional</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.activation">tensorrt_llm.layers.activation</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.attention">tensorrt_llm.layers.attention</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.cast">tensorrt_llm.layers.cast</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.conv">tensorrt_llm.layers.conv</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.embedding">tensorrt_llm.layers.embedding</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.linear">tensorrt_llm.layers.linear</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.mlp">tensorrt_llm.layers.mlp</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.normalization">tensorrt_llm.layers.normalization</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.pooling">tensorrt_llm.layers.pooling</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#module-tensorrt_llm.models">tensorrt_llm.models</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.plugin.html#module-tensorrt_llm.plugin">tensorrt_llm.plugin</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.quantization.html#module-tensorrt_llm.quantization">tensorrt_llm.quantization</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#module-tensorrt_llm.runtime">tensorrt_llm.runtime</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.modulo">modulo() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.SideStreamIDType.moe">moe (tensorrt_llm.functional.SideStreamIDType attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.moe_cluster_parallel_size">moe_cluster_parallel_size (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.moe_cluster_parallel_size">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.moe_config">moe_config (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.moe_expert_parallel_size">moe_expert_parallel_size (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.moe_expert_parallel_size">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceFusionOp.MOE_FINALIZE_ALLREDUCE_RESIDUAL_RMS_NORM">MOE_FINALIZE_ALLREDUCE_RESIDUAL_RMS_NORM (tensorrt_llm.functional.AllReduceFusionOp attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.moe_plugin">moe_plugin (tensorrt_llm.plugin.PluginConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.moe_tensor_parallel_size">moe_tensor_parallel_size (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.moe_tensor_parallel_size">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.MoEAllReduceParams">MoEAllReduceParams (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig">MoeConfig (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.Config">MoeConfig.Config (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.monitor_memory">monitor_memory (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.mpi_session">mpi_session (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.mpi_session">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MpiCommSession">MpiCommSession (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.MPTForCausalLM">MPTForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.MPTModel">MPTModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.mrope">mrope (tensorrt_llm.functional.PositionEmbeddingType attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RotaryScalingType.mrope">(tensorrt_llm.functional.RotaryScalingType attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.MropeParams">MropeParams (class in tensorrt_llm.layers.attention)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.msg">msg (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.msg">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.mtp_eagle_one_model">mtp_eagle_one_model (tensorrt_llm.llmapi.MTPDecodingConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig">MTPDecodingConfig (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.Config">MTPDecodingConfig.Config (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.mul">mul() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.multi_block_mode">multi_block_mode (tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DisaggregatedParams.multimodal_embedding_handles">multimodal_embedding_handles (tensorrt_llm.llmapi.DisaggregatedParams attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DisaggregatedParams.multimodal_hashes">multimodal_hashes (tensorrt_llm.llmapi.DisaggregatedParams attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MultimodalEncoder">MultimodalEncoder (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner">MultimodalModelRunner (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.multiple_profiles">multiple_profiles (tensorrt_llm.plugin.PluginConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.LinearBase.multiply_and_lora">multiply_and_lora() (tensorrt_llm.layers.linear.LinearBase method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.LinearBase.multiply_collect">multiply_collect() (tensorrt_llm.layers.linear.LinearBase method)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.RowLinear.multiply_collect">(tensorrt_llm.layers.linear.RowLinear method)</a>
</li>
      </ul></li>
  </ul></td>
</tr></table>

<h2 id="N">N</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.n">n (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.name">name (tensorrt_llm.functional.Tensor property)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LoRARequest.name">(tensorrt_llm.llmapi.LoRARequest property)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.TensorInfo.name">(tensorrt_llm.runtime.TensorInfo attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaForCausalLM.NATIVE_QUANT_FLOW">NATIVE_QUANT_FLOW (tensorrt_llm.models.GemmaForCausalLM attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceStrategy.NCCL">NCCL (tensorrt_llm.functional.AllReduceStrategy attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.nccl_plugin">nccl_plugin (tensorrt_llm.plugin.PluginConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceStrategy.NCCL_SYMMETRIC">NCCL_SYMMETRIC (tensorrt_llm.functional.AllReduceStrategy attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.ndim">ndim() (tensorrt_llm.functional.Tensor method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.network">network (tensorrt_llm.functional.Tensor property)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.next_medusa_input_ids">next_medusa_input_ids() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.SpeculativeDecodingMode.NGRAM">NGRAM (tensorrt_llm.models.SpeculativeDecodingMode attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig">NGramDecodingConfig (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.Config">NGramDecodingConfig.Config (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.NO_QUANT">NO_QUANT (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.no_repeat_ngram_size">no_repeat_ngram_size (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.no_repeat_ngram_size">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.non_gated_version">non_gated_version() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceFusionOp.NONE">NONE (tensorrt_llm.functional.AllReduceFusionOp attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RotaryScalingType.none">none (tensorrt_llm.functional.RotaryScalingType attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.SpeculativeDecodingMode.NONE">NONE (tensorrt_llm.models.SpeculativeDecodingMode attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.nonzero">nonzero() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.norm_quant_fusion">norm_quant_fusion (tensorrt_llm.plugin.PluginConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.normalize_log_probs">normalize_log_probs (tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.not_op">not_op() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.num_beams">num_beams (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.num_capture_layers">num_capture_layers (tensorrt_llm.llmapi.EagleDecodingConfig property)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.num_capture_layers">(tensorrt_llm.llmapi.MTPDecodingConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.num_capture_layers">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig property)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.num_draft_tokens">num_draft_tokens (tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.num_eagle_layers">num_eagle_layers (tensorrt_llm.llmapi.EagleDecodingConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.num_heads">num_heads (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.num_heads">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.num_heads">(tensorrt_llm.runtime.ModelRunner property)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.num_heads">(tensorrt_llm.runtime.ModelRunnerCpp property)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.num_kv_heads">num_kv_heads (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.num_kv_heads_per_cross_attn_layer">num_kv_heads_per_cross_attn_layer (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.num_kv_heads_per_layer">num_kv_heads_per_layer (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.num_layers">num_layers (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.num_layers">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.num_layers">(tensorrt_llm.runtime.ModelRunner property)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.num_layers">(tensorrt_llm.runtime.ModelRunnerCpp property)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.num_medusa_heads">num_medusa_heads (tensorrt_llm.llmapi.MedusaDecodingConfig attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.num_medusa_heads">(tensorrt_llm.runtime.GenerationSession property)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.num_medusa_heads">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.num_nextn_predict_layers">num_nextn_predict_layers (tensorrt_llm.llmapi.MTPDecodingConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.num_nextn_predict_layers_from_model_config">num_nextn_predict_layers_from_model_config (tensorrt_llm.llmapi.MTPDecodingConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.num_postprocess_workers">num_postprocess_workers (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.num_postprocess_workers">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.num_return_sequences">num_return_sequences (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.TensorInfo.numel">numel() (tensorrt_llm.runtime.TensorInfo method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.NVFP4">NVFP4 (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv48nvinfer1">nvinfer1 (C++ type)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="O">O</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.onboard_blocks">onboard_blocks (tensorrt_llm.llmapi.KvCacheConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceStrategy.ONESHOT">ONESHOT (tensorrt_llm.functional.AllReduceStrategy attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.op_and">op_and() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.op_or">op_or() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.op_xor">op_xor() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DisaggregatedParams.opaque_state">opaque_state (tensorrt_llm.llmapi.DisaggregatedParams attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.opt_batch_size">opt_batch_size (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.opt_num_tokens">opt_num_tokens (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.OPTForCausalLM">OPTForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.OPTModel">OPTModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.orchestrator_type">orchestrator_type (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.orchestrator_type">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.otlp_traces_endpoint">otlp_traces_endpoint (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.otlp_traces_endpoint">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.outer">outer() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.output_cum_log_probs">output_cum_log_probs (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.output_directory">output_directory (tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.output_log_probs">output_log_probs (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.output_sequence_lengths">output_sequence_lengths (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.output_timing_cache">output_timing_cache (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.outputs">outputs (tensorrt_llm.llmapi.RequestOutput attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#id9">(tensorrt_llm.llmapi.RequestOutput property)</a>
</li>
      </ul></li>
  </ul></td>
</tr></table>

<h2 id="P">P</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.pad">pad() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.pad_id">pad_id (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.pad_id">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AttentionMaskType.padding">padding (tensorrt_llm.functional.AttentionMaskType attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.page_size">page_size (tensorrt_llm.llmapi.RocketSparseAttentionConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.paged_kv_cache">paged_kv_cache (tensorrt_llm.plugin.PluginConfig attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.paged_kv_cache">(tensorrt_llm.runtime.GenerationSession property)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.paged_state">paged_state (tensorrt_llm.plugin.PluginConfig attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.paged_state">(tensorrt_llm.runtime.GenerationSession property)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.paged_state">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.parallel_config">parallel_config (tensorrt_llm.llmapi.TorchLlmArgs property)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.parallel_config">(tensorrt_llm.llmapi.TrtLlmArgs property)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.params_imply_greedy_decoding">params_imply_greedy_decoding() (tensorrt_llm.llmapi.SamplingParams static method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.parse_file">parse_file() (tensorrt_llm.llmapi.AttentionDpConfig class method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.parse_file">(tensorrt_llm.llmapi.AutoDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.parse_file">(tensorrt_llm.llmapi.BuildConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.parse_file">(tensorrt_llm.llmapi.CacheTransceiverConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.parse_file">(tensorrt_llm.llmapi.CalibConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig.parse_file">(tensorrt_llm.llmapi.CudaGraphConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.parse_file">(tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.parse_file">(tensorrt_llm.llmapi.DraftTargetDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.parse_file">(tensorrt_llm.llmapi.DynamicBatchConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.parse_file">(tensorrt_llm.llmapi.EagleDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.parse_file">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.parse_file">(tensorrt_llm.llmapi.KvCacheConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.parse_file">(tensorrt_llm.llmapi.LookaheadDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.parse_file">(tensorrt_llm.llmapi.MedusaDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.parse_file">(tensorrt_llm.llmapi.MoeConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.parse_file">(tensorrt_llm.llmapi.MTPDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.parse_file">(tensorrt_llm.llmapi.NGramDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.parse_file">(tensorrt_llm.llmapi.RocketSparseAttentionConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.parse_file">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.parse_file">(tensorrt_llm.llmapi.SchedulerConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.parse_file">(tensorrt_llm.llmapi.TorchCompileConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.parse_file">(tensorrt_llm.llmapi.UserProvidedDecodingConfig class method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.parse_obj">parse_obj() (tensorrt_llm.llmapi.AttentionDpConfig class method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.parse_obj">(tensorrt_llm.llmapi.AutoDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.parse_obj">(tensorrt_llm.llmapi.BuildConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.parse_obj">(tensorrt_llm.llmapi.CacheTransceiverConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.parse_obj">(tensorrt_llm.llmapi.CalibConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig.parse_obj">(tensorrt_llm.llmapi.CudaGraphConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.parse_obj">(tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.parse_obj">(tensorrt_llm.llmapi.DraftTargetDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.parse_obj">(tensorrt_llm.llmapi.DynamicBatchConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.parse_obj">(tensorrt_llm.llmapi.EagleDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.parse_obj">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.parse_obj">(tensorrt_llm.llmapi.KvCacheConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.parse_obj">(tensorrt_llm.llmapi.LookaheadDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.parse_obj">(tensorrt_llm.llmapi.MedusaDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.parse_obj">(tensorrt_llm.llmapi.MoeConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.parse_obj">(tensorrt_llm.llmapi.MTPDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.parse_obj">(tensorrt_llm.llmapi.NGramDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.parse_obj">(tensorrt_llm.llmapi.RocketSparseAttentionConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.parse_obj">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.parse_obj">(tensorrt_llm.llmapi.SchedulerConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.parse_obj">(tensorrt_llm.llmapi.TorchCompileConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.parse_obj">(tensorrt_llm.llmapi.UserProvidedDecodingConfig class method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.parse_raw">parse_raw() (tensorrt_llm.llmapi.AttentionDpConfig class method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.parse_raw">(tensorrt_llm.llmapi.AutoDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.parse_raw">(tensorrt_llm.llmapi.BuildConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.parse_raw">(tensorrt_llm.llmapi.CacheTransceiverConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.parse_raw">(tensorrt_llm.llmapi.CalibConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig.parse_raw">(tensorrt_llm.llmapi.CudaGraphConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.parse_raw">(tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.parse_raw">(tensorrt_llm.llmapi.DraftTargetDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.parse_raw">(tensorrt_llm.llmapi.DynamicBatchConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.parse_raw">(tensorrt_llm.llmapi.EagleDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.parse_raw">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.parse_raw">(tensorrt_llm.llmapi.KvCacheConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.parse_raw">(tensorrt_llm.llmapi.LookaheadDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.parse_raw">(tensorrt_llm.llmapi.MedusaDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.parse_raw">(tensorrt_llm.llmapi.MoeConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.parse_raw">(tensorrt_llm.llmapi.MTPDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.parse_raw">(tensorrt_llm.llmapi.NGramDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.parse_raw">(tensorrt_llm.llmapi.RocketSparseAttentionConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.parse_raw">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.parse_raw">(tensorrt_llm.llmapi.SchedulerConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.parse_raw">(tensorrt_llm.llmapi.TorchCompileConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.parse_raw">(tensorrt_llm.llmapi.UserProvidedDecodingConfig class method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.partition">partition() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.partition">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.partition">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.partition">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LoRARequest.path">path (tensorrt_llm.llmapi.LoRARequest property)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.peft_cache_config">peft_cache_config (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.peft_cache_config">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.perf_metrics_max_requests">perf_metrics_max_requests (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.permute">permute() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.permute">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.Phi3ForCausalLM">Phi3ForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.Phi3Model">Phi3Model (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.PhiForCausalLM">PhiForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.PhiModel">PhiModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.pipeline_parallel_size">pipeline_parallel_size (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.pipeline_parallel_size">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.PixArtAlphaTextProjection">PixArtAlphaTextProjection (class in tensorrt_llm.layers.embedding)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.plugin_config">plugin_config (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType">PositionEmbeddingType (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.LayerNormPositionType.post_layernorm">post_layernorm (tensorrt_llm.functional.LayerNormPositionType attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.posterior_threshold">posterior_threshold (tensorrt_llm.llmapi.EagleDecodingConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input.postproc_params">postproc_params (tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.Attention.postprocess">postprocess() (tensorrt_llm.layers.attention.Attention method)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.DeepseekV2Attention.postprocess">(tensorrt_llm.layers.attention.DeepseekV2Attention method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.Embedding.postprocess">(tensorrt_llm.layers.embedding.Embedding method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.Linear.postprocess">(tensorrt_llm.layers.linear.Linear method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.postprocess_tokenizer_dir">postprocess_tokenizer_dir (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.postprocess_tokenizer_dir">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.pow">pow() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.pp_communicate_final_output_ids">pp_communicate_final_output_ids() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.pp_communicate_new_tokens">pp_communicate_new_tokens() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.pp_reduce_scatter">pp_reduce_scatter (tensorrt_llm.plugin.PluginConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.LayerNormPositionType.pre_layernorm">pre_layernorm (tensorrt_llm.functional.LayerNormPositionType attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.pre_quant_scale">pre_quant_scale (tensorrt_llm.llmapi.QuantConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.DecoderModel.precompute_relative_attention_bias">precompute_relative_attention_bias() (tensorrt_llm.models.DecoderModel method)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.EncoderModel.precompute_relative_attention_bias">(tensorrt_llm.models.EncoderModel method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.WhisperEncoder.precompute_relative_attention_bias">(tensorrt_llm.models.WhisperEncoder method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.ChatGLMForCausalLM.prepare_inputs">prepare_inputs() (tensorrt_llm.models.ChatGLMForCausalLM method)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.DecoderModel.prepare_inputs">(tensorrt_llm.models.DecoderModel method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.DiT.prepare_inputs">(tensorrt_llm.models.DiT method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.EagleForCausalLM.prepare_inputs">(tensorrt_llm.models.EagleForCausalLM method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.EncoderModel.prepare_inputs">(tensorrt_llm.models.EncoderModel method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.LlavaNextVisionWrapper.prepare_inputs">(tensorrt_llm.models.LlavaNextVisionWrapper method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.MambaForCausalLM.prepare_inputs">(tensorrt_llm.models.MambaForCausalLM method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.MLLaMAForCausalLM.prepare_inputs">(tensorrt_llm.models.MLLaMAForCausalLM method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedModel.prepare_inputs">(tensorrt_llm.models.PretrainedModel method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.RecurrentGemmaForCausalLM.prepare_inputs">(tensorrt_llm.models.RecurrentGemmaForCausalLM method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.SD3Transformer2DModel.prepare_inputs">(tensorrt_llm.models.SD3Transformer2DModel method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.WhisperEncoder.prepare_inputs">(tensorrt_llm.models.WhisperEncoder method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.prepare_position_ids_for_cogvlm">prepare_position_ids_for_cogvlm() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.RecurrentGemmaForCausalLM.prepare_recurrent_inputs">prepare_recurrent_inputs() (tensorrt_llm.models.RecurrentGemmaForCausalLM method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.preprocess">preprocess() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.presence_penalty">presence_penalty (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.presence_penalty">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig">PretrainedConfig (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedModel">PretrainedModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.print_iter_log">print_iter_log (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.priority">priority (tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig property)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.EncDecModelRunner.process_input">process_input() (tensorrt_llm.runtime.EncDecModelRunner method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.process_logits_including_draft">process_logits_including_draft() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.prod">prod() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.profiler">profiler (tensorrt_llm.runtime.GenerationSession property)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.profiling_verbosity">profiling_verbosity (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.prompt">prompt (tensorrt_llm.llmapi.RequestOutput attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#id10">(tensorrt_llm.llmapi.RequestOutput property)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.prompt_budget">prompt_budget (tensorrt_llm.llmapi.RocketSparseAttentionConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.prompt_ignore_length">prompt_ignore_length (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.prompt_ignore_length">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CompletionOutput.prompt_logprobs">prompt_logprobs (tensorrt_llm.llmapi.CompletionOutput attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.prompt_logprobs">(tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.prompt_token_ids">prompt_token_ids (tensorrt_llm.llmapi.RequestOutput attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#id11">(tensorrt_llm.llmapi.RequestOutput property)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.PromptTuningEmbedding">PromptTuningEmbedding (class in tensorrt_llm.layers.embedding)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup">ptuning_setup() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup_fuyu">ptuning_setup_fuyu() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup_llava_next">ptuning_setup_llava_next() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup_phi3">ptuning_setup_phi3() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup_pixtral">ptuning_setup_pixtral() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.pybind_equals">pybind_equals() (tensorrt_llm.llmapi.CacheTransceiverConfig static method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.pybind_equals">(tensorrt_llm.llmapi.DynamicBatchConfig static method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.pybind_equals">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig static method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.pybind_equals">(tensorrt_llm.llmapi.KvCacheConfig static method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.pybind_equals">(tensorrt_llm.llmapi.LookaheadDecodingConfig static method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.pybind_equals">(tensorrt_llm.llmapi.SchedulerConfig static method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.python_e2e">python_e2e (tensorrt_llm.runtime.MultimodalModelRunner property)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="Q">Q</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.qserve_gemm_plugin">qserve_gemm_plugin (tensorrt_llm.plugin.PluginConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.quant_algo">quant_algo (tensorrt_llm.llmapi.QuantConfig attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.quant_algo">(tensorrt_llm.models.PretrainedConfig property)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.quant_config">quant_config (tensorrt_llm.llmapi.TorchLlmArgs property)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.quant_config">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.quant_mode">quant_mode (tensorrt_llm.llmapi.QuantConfig property)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.quant_mode">(tensorrt_llm.models.PretrainedConfig property)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.quant_mode">(tensorrt_llm.runtime.GenerationSession property)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.quant_mode">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo">QuantAlgo (class in tensorrt_llm.llmapi)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.quantization.html#tensorrt_llm.quantization.QuantAlgo">(class in tensorrt_llm.quantization)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig">QuantConfig (class in tensorrt_llm.llmapi)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.BaichuanForCausalLM.quantize">quantize() (tensorrt_llm.models.BaichuanForCausalLM class method)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.ChatGLMForCausalLM.quantize">(tensorrt_llm.models.ChatGLMForCausalLM class method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.CogVLMForCausalLM.quantize">(tensorrt_llm.models.CogVLMForCausalLM class method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaForCausalLM.quantize">(tensorrt_llm.models.GemmaForCausalLM class method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTForCausalLM.quantize">(tensorrt_llm.models.GPTForCausalLM class method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.LLaMAForCausalLM.quantize">(tensorrt_llm.models.LLaMAForCausalLM class method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedModel.quantize">(tensorrt_llm.models.PretrainedModel class method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.quantization.html#tensorrt_llm.quantization.quantize_and_export">quantize_and_export() (in module tensorrt_llm.quantization)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.quantize_per_token_plugin">quantize_per_token_plugin (tensorrt_llm.plugin.PluginConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.quantize_tensor_plugin">quantize_tensor_plugin (tensorrt_llm.plugin.PluginConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.quantization.html#tensorrt_llm.quantization.QuantMode">QuantMode (class in tensorrt_llm.quantization)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.quick_gelu">quick_gelu() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.QWenForCausalLMGenerationSession">QWenForCausalLMGenerationSession (class in tensorrt_llm.runtime)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="R">R</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.rand">rand() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.random_seed">random_seed (tensorrt_llm.llmapi.CalibConfig attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.random_seed">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.rank">rank() (tensorrt_llm.functional.Tensor method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.ray_worker_extension_cls">ray_worker_extension_cls (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.rearrange">rearrange() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.reasoning_parser">reasoning_parser (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.reasoning_parser">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.record_stats">record_stats() (tensorrt_llm.llmapi.RequestOutput method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.RecurrentGemmaForCausalLM">RecurrentGemmaForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.recv">recv() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.redrafter_draft_len_per_beam">redrafter_draft_len_per_beam (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.redrafter_num_beams">redrafter_num_beams (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.ReDrafterForLLaMALM">ReDrafterForLLaMALM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.ReDrafterForQWenLM">ReDrafterForQWenLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.reduce">reduce() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.reduce_fusion">reduce_fusion (tensorrt_llm.plugin.PluginConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.reduce_scatter">reduce_scatter() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.GuidedDecodingParams.regex">regex (tensorrt_llm.llmapi.GuidedDecodingParams attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.relative">relative (tensorrt_llm.functional.PositionEmbeddingType attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.relaxed_delta">relaxed_delta (tensorrt_llm.llmapi.MTPDecodingConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.relaxed_topk">relaxed_topk (tensorrt_llm.llmapi.MTPDecodingConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedModel.release">release() (tensorrt_llm.models.PretrainedModel method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.relu">relu() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.remove_input_padding">remove_input_padding (tensorrt_llm.plugin.PluginConfig attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.remove_input_padding">(tensorrt_llm.runtime.GenerationSession property)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.remove_input_padding">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.remove_input_padding">(tensorrt_llm.runtime.ModelRunner property)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.remove_input_padding">(tensorrt_llm.runtime.ModelRunnerCpp property)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.removeprefix">removeprefix() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.removeprefix">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.removeprefix">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.removeprefix">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.removesuffix">removesuffix() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.removesuffix">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.removesuffix">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.removesuffix">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.reorder_kv_cache_for_beam_search">reorder_kv_cache_for_beam_search() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.repeat">repeat() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.repeat">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.repeat_interleave">repeat_interleave() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.repetition_penalty">repetition_penalty (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.repetition_penalty">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.replace">replace() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.replace">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.replace">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.replace">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.replace_all_uses_with">replace_all_uses_with() (tensorrt_llm.functional.Tensor method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.request_id">request_id (tensorrt_llm.llmapi.RequestOutput attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#id12">(tensorrt_llm.llmapi.RequestOutput property)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CompletionOutput.request_perf_metrics">request_perf_metrics (tensorrt_llm.llmapi.CompletionOutput attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.request_perf_metrics">(tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.request_stats_max_iterations">request_stats_max_iterations (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.request_stats_max_iterations">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DisaggregatedParams.request_type">request_type (tensorrt_llm.llmapi.DisaggregatedParams attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestError">RequestError (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput">RequestOutput (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.PostprocWorker">RequestOutput.PostprocWorker (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input">RequestOutput.PostprocWorker.Input (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output">RequestOutput.PostprocWorker.Output (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.res">res (tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM">RESIDUAL_RMS_NORM (tensorrt_llm.functional.AllReduceFusionOp attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_OUT_QUANT_FP8">RESIDUAL_RMS_NORM_OUT_QUANT_FP8 (tensorrt_llm.functional.AllReduceFusionOp attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_OUT_QUANT_NVFP4">RESIDUAL_RMS_NORM_OUT_QUANT_NVFP4 (tensorrt_llm.functional.AllReduceFusionOp attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_QUANT_FP8">RESIDUAL_RMS_NORM_QUANT_FP8 (tensorrt_llm.functional.AllReduceFusionOp attribute)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_QUANT_NVFP4">RESIDUAL_RMS_NORM_QUANT_NVFP4 (tensorrt_llm.functional.AllReduceFusionOp attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_PREPOST_NORM">RESIDUAL_RMS_PREPOST_NORM (tensorrt_llm.functional.AllReduceFusionOp attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.resource_manager">resource_manager (tensorrt_llm.llmapi.UserProvidedDecodingConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.result">result() (tensorrt_llm.llmapi.RequestOutput method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.return_context_logits">return_context_logits (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.return_dict">return_dict (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.return_encoder_output">return_encoder_output (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.return_generation_logits">return_generation_logits (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.return_perf_metrics">return_perf_metrics (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.return_perf_metrics">(tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.return_perf_metrics">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.revision">revision (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.revision">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.rfind">rfind() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.rfind">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.rfind">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.rfind">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.rg_lru">rg_lru() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.rindex">rindex() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.rindex">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.rindex">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.rindex">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.rjust">rjust() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.rjust">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.rjust">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.rjust">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.rms_norm">rms_norm() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.normalization.RmsNorm">RmsNorm (class in tensorrt_llm.layers.normalization)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.LayerNormType.RmsNorm">(tensorrt_llm.functional.LayerNormType attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.rmsnorm_quantization_plugin">rmsnorm_quantization_plugin (tensorrt_llm.plugin.PluginConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.rnn_conv_dim_size">rnn_conv_dim_size (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.rnn_conv_dim_size">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.rnn_head_size">rnn_head_size (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.rnn_head_size">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.rnn_hidden_size">rnn_hidden_size (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.rnn_hidden_size">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.RobertaForQuestionAnswering">RobertaForQuestionAnswering (in module tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.RobertaForSequenceClassification">RobertaForSequenceClassification (in module tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.RobertaModel">RobertaModel (in module tensorrt_llm.models)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig">RocketSparseAttentionConfig (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.Config">RocketSparseAttentionConfig.Config (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.rope_gpt_neox">rope_gpt_neox (tensorrt_llm.functional.PositionEmbeddingType attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.rope_gptj">rope_gptj (tensorrt_llm.functional.PositionEmbeddingType attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RopeEmbeddingUtils">RopeEmbeddingUtils (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RotaryScalingType">RotaryScalingType (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RopeEmbeddingUtils.rotate_every_two">rotate_every_two() (tensorrt_llm.functional.RopeEmbeddingUtils static method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RopeEmbeddingUtils.rotate_half">rotate_half() (tensorrt_llm.functional.RopeEmbeddingUtils static method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.round">round() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.RowLinear">RowLinear (class in tensorrt_llm.layers.linear)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.rpartition">rpartition() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.rpartition">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.rpartition">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.rpartition">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input.rsp">rsp (tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.rsplit">rsplit() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.rsplit">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.rsplit">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.rsplit">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.rstrip">rstrip() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.rstrip">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.rstrip">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.rstrip">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.run">run() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.Session.run">(tensorrt_llm.runtime.Session method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.runtime">runtime (tensorrt_llm.runtime.GenerationSession attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.Session.runtime">(tensorrt_llm.runtime.Session property)</a>
</li>
      </ul></li>
  </ul></td>
</tr></table>

<h2 id="S">S</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.sampler_type">sampler_type (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input.sampling_params">sampling_params (tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig">SamplingConfig (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams">SamplingParams (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.LlavaNextVisionWrapper.save_checkpoint">save_checkpoint() (tensorrt_llm.models.LlavaNextVisionWrapper method)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedModel.save_checkpoint">(tensorrt_llm.models.PretrainedModel method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.SpeculativeDecodingMode.SAVE_HIDDEN_STATES">SAVE_HIDDEN_STATES (tensorrt_llm.models.SpeculativeDecodingMode attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig">SaveHiddenStatesDecodingConfig (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.Config">SaveHiddenStatesDecodingConfig.Config (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.scatter">scatter() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.scatter_nd">scatter_nd() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.scheduler_config">scheduler_config (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.scheduler_config">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig">SchedulerConfig (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.Config">SchedulerConfig.Config (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.schema">schema() (tensorrt_llm.llmapi.AttentionDpConfig class method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.schema">(tensorrt_llm.llmapi.AutoDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.schema">(tensorrt_llm.llmapi.BuildConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.schema">(tensorrt_llm.llmapi.CacheTransceiverConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.schema">(tensorrt_llm.llmapi.CalibConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig.schema">(tensorrt_llm.llmapi.CudaGraphConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.schema">(tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.schema">(tensorrt_llm.llmapi.DraftTargetDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.schema">(tensorrt_llm.llmapi.DynamicBatchConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.schema">(tensorrt_llm.llmapi.EagleDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.schema">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.schema">(tensorrt_llm.llmapi.KvCacheConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.schema">(tensorrt_llm.llmapi.LookaheadDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.schema">(tensorrt_llm.llmapi.MedusaDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.schema">(tensorrt_llm.llmapi.MoeConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.schema">(tensorrt_llm.llmapi.MTPDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.schema">(tensorrt_llm.llmapi.NGramDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.schema">(tensorrt_llm.llmapi.RocketSparseAttentionConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.schema">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.schema">(tensorrt_llm.llmapi.SchedulerConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.schema">(tensorrt_llm.llmapi.TorchCompileConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.schema">(tensorrt_llm.llmapi.UserProvidedDecodingConfig class method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.schema_json">schema_json() (tensorrt_llm.llmapi.AttentionDpConfig class method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.schema_json">(tensorrt_llm.llmapi.AutoDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.schema_json">(tensorrt_llm.llmapi.BuildConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.schema_json">(tensorrt_llm.llmapi.CacheTransceiverConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.schema_json">(tensorrt_llm.llmapi.CalibConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig.schema_json">(tensorrt_llm.llmapi.CudaGraphConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.schema_json">(tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.schema_json">(tensorrt_llm.llmapi.DraftTargetDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.schema_json">(tensorrt_llm.llmapi.DynamicBatchConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.schema_json">(tensorrt_llm.llmapi.EagleDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.schema_json">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.schema_json">(tensorrt_llm.llmapi.KvCacheConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.schema_json">(tensorrt_llm.llmapi.LookaheadDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.schema_json">(tensorrt_llm.llmapi.MedusaDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.schema_json">(tensorrt_llm.llmapi.MoeConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.schema_json">(tensorrt_llm.llmapi.MTPDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.schema_json">(tensorrt_llm.llmapi.NGramDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.schema_json">(tensorrt_llm.llmapi.RocketSparseAttentionConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.schema_json">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.schema_json">(tensorrt_llm.llmapi.SchedulerConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.schema_json">(tensorrt_llm.llmapi.TorchCompileConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.schema_json">(tensorrt_llm.llmapi.UserProvidedDecodingConfig class method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.normalization.SD35AdaLayerNormZeroX">SD35AdaLayerNormZeroX (class in tensorrt_llm.layers.normalization)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.SD3PatchEmbed">SD3PatchEmbed (class in tensorrt_llm.layers.embedding)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.SD3Transformer2DModel">SD3Transformer2DModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.secondary_offload_min_priority">secondary_offload_min_priority (tensorrt_llm.llmapi.KvCacheConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.seed">seed (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.select">select() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.select">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.selective_scan">selective_scan() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.send">send() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.serialize_engine">serialize_engine() (tensorrt_llm.runtime.ModelRunner method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.Session">Session (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.SD3Transformer2DModel.set_attn_processor">set_attn_processor() (tensorrt_llm.models.SD3Transformer2DModel method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.set_context_fmha">set_context_fmha() (tensorrt_llm.plugin.PluginConfig method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.set_default_max_input_len">set_default_max_input_len() (tensorrt_llm.llmapi.TorchLlmArgs method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.set_default_max_input_len">(tensorrt_llm.llmapi.TrtLlmArgs method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.set_dora_plugin">set_dora_plugin() (tensorrt_llm.plugin.PluginConfig method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.set_fp8_rowwise_quant_plugins">set_fp8_rowwise_quant_plugins() (tensorrt_llm.plugin.PluginConfig method)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#c.SET_FROM_OPTIONAL">SET_FROM_OPTIONAL (C macro)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.set_if_not_exist">set_if_not_exist() (tensorrt_llm.models.PretrainedConfig method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.set_lora_plugin">set_lora_plugin() (tensorrt_llm.plugin.PluginConfig method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.set_nccl_plugin">set_nccl_plugin() (tensorrt_llm.plugin.PluginConfig method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.set_qserve_plugins">set_qserve_plugins() (tensorrt_llm.plugin.PluginConfig method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.set_rank">set_rank() (tensorrt_llm.models.PretrainedConfig method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.Attention.set_rel_attn_table">set_rel_attn_table() (tensorrt_llm.layers.attention.Attention method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.set_runtime_knobs_from_build_config">set_runtime_knobs_from_build_config() (tensorrt_llm.llmapi.TorchLlmArgs method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.set_runtime_knobs_from_build_config">(tensorrt_llm.llmapi.TrtLlmArgs method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.Session.set_shapes">set_shapes() (tensorrt_llm.runtime.Session method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.set_smooth_quant_plugins">set_smooth_quant_plugins() (tensorrt_llm.plugin.PluginConfig method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.setup">setup() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.setup_embedding_parallel_mode">setup_embedding_parallel_mode() (tensorrt_llm.llmapi.TrtLlmArgs method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.setup_fake_prompts">setup_fake_prompts() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.setup_fake_prompts_qwen2vl">setup_fake_prompts_qwen2vl() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.setup_fake_prompts_vila">setup_fake_prompts_vila() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.setup_inputs">setup_inputs() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.shape">shape (tensorrt_llm.functional.Tensor property)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.TensorInfo.shape">(tensorrt_llm.runtime.TensorInfo attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.shape">shape() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.shutdown">shutdown() (tensorrt_llm.llmapi.LLM method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MpiCommSession.shutdown">(tensorrt_llm.llmapi.MpiCommSession method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MultimodalEncoder.shutdown">(tensorrt_llm.llmapi.MultimodalEncoder method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MpiCommSession.shutdown_abort">shutdown_abort() (tensorrt_llm.llmapi.MpiCommSession method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.SideStreamIDType">SideStreamIDType (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.sigmoid">sigmoid() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.silu">silu() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.sin">sin() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.sink_token_length">sink_token_length (tensorrt_llm.llmapi.KvCacheConfig attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.sink_token_length">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.SliceInputType.size">size (tensorrt_llm.functional.SliceInputType attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.size">size() (tensorrt_llm.functional.Tensor method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.skip_cross_attn_blocks">skip_cross_attn_blocks (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.skip_cross_kv">skip_cross_kv (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.skip_special_tokens">skip_special_tokens (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.skip_tokenizer_init">skip_tokenizer_init (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.skip_tokenizer_init">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.slice">slice() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.SliceInputType">SliceInputType (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AttentionMaskType.sliding_window_causal">sliding_window_causal (tensorrt_llm.functional.AttentionMaskType attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.smooth_quant_gemm_plugin">smooth_quant_gemm_plugin (tensorrt_llm.plugin.PluginConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.smooth_quant_plugins">smooth_quant_plugins (tensorrt_llm.plugin.PluginConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.smoothquant_val">smoothquant_val (tensorrt_llm.llmapi.QuantConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.softmax">softmax() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.softplus">softplus() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.spaces_between_special_tokens">spaces_between_special_tokens (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.sparse_attention_config">sparse_attention_config (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.sparse_attention_config">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.spec_dec_mode">spec_dec_mode (tensorrt_llm.llmapi.AutoDecodingConfig property)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.spec_dec_mode">(tensorrt_llm.llmapi.DraftTargetDecodingConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.spec_dec_mode">(tensorrt_llm.llmapi.EagleDecodingConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.spec_dec_mode">(tensorrt_llm.llmapi.LookaheadDecodingConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.spec_dec_mode">(tensorrt_llm.llmapi.MedusaDecodingConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.spec_dec_mode">(tensorrt_llm.llmapi.MTPDecodingConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.spec_dec_mode">(tensorrt_llm.llmapi.NGramDecodingConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.spec_dec_mode">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.spec_dec_mode">(tensorrt_llm.llmapi.UserProvidedDecodingConfig property)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.SpecDecodingParams">SpecDecodingParams (class in tensorrt_llm.layers.attention)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.speculative_config">speculative_config (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.speculative_config">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.speculative_decoding_mode">speculative_decoding_mode (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.speculative_model_dir">speculative_model_dir (tensorrt_llm.llmapi.AutoDecodingConfig attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.speculative_model_dir">(tensorrt_llm.llmapi.DraftTargetDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.speculative_model_dir">(tensorrt_llm.llmapi.EagleDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.speculative_model_dir">(tensorrt_llm.llmapi.LookaheadDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.speculative_model_dir">(tensorrt_llm.llmapi.MedusaDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.speculative_model_dir">(tensorrt_llm.llmapi.MTPDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.speculative_model_dir">(tensorrt_llm.llmapi.NGramDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.speculative_model_dir">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.speculative_model_dir">(tensorrt_llm.llmapi.TorchLlmArgs property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.speculative_model_dir">(tensorrt_llm.llmapi.TrtLlmArgs property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.speculative_model_dir">(tensorrt_llm.llmapi.UserProvidedDecodingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.speculative_model_format">speculative_model_format (tensorrt_llm.llmapi.TorchLlmArgs property)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.speculative_model_format">(tensorrt_llm.llmapi.TrtLlmArgs property)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.SpeculativeDecodingMode">SpeculativeDecodingMode (class in tensorrt_llm.models)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.split">split() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.split">(tensorrt_llm.functional.Tensor method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.split">(tensorrt_llm.llmapi.BatchingType method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.split">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.split">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.split">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.split_prompt_by_images">split_prompt_by_images() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.splitlines">splitlines() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.splitlines">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.splitlines">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.splitlines">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.sqrt">sqrt() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.sqrt">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.squared_relu">squared_relu() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.squeeze">squeeze() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.squeeze">(tensorrt_llm.functional.Tensor method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.TensorInfo.squeeze">(tensorrt_llm.runtime.TensorInfo method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.stack">stack() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.SliceInputType.start">start (tensorrt_llm.functional.SliceInputType attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.PostprocWorker.start">start() (tensorrt_llm.llmapi.RequestOutput.PostprocWorker method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.startswith">startswith() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.startswith">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.startswith">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.startswith">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.state_dtype">state_dtype (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.state_dtype">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.state_size">state_size (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.state_size">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.STATIC">STATIC (tensorrt_llm.llmapi.BatchingType attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.STATIC_BATCH">STATIC_BATCH (tensorrt_llm.llmapi.CapacitySchedulerPolicy attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.KVCacheManager.step">step() (tensorrt_llm.runtime.KVCacheManager method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.stop">stop (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CompletionOutput.stop_reason">stop_reason (tensorrt_llm.llmapi.CompletionOutput attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.stop_token_ids">stop_token_ids (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.stop_words_list">stop_words_list (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.StoppingCriteria">StoppingCriteria (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.StoppingCriteriaList">StoppingCriteriaList (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.stream_interval">stream_interval (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input.streaming">streaming (tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.streamingllm">streamingllm (tensorrt_llm.plugin.PluginConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.SliceInputType.stride">stride (tensorrt_llm.functional.SliceInputType attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.strip">strip() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.strip">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.strip">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.strip">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.strongly_typed">strongly_typed (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.GuidedDecodingParams.structural_tag">structural_tag (tensorrt_llm.llmapi.GuidedDecodingParams attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.sub">sub() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MpiCommSession.submit">submit() (tensorrt_llm.llmapi.MpiCommSession method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MpiCommSession.submit_sync">submit_sync() (tensorrt_llm.llmapi.MpiCommSession method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.sum">sum() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.supports_backend">supports_backend() (tensorrt_llm.llmapi.AutoDecodingConfig method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.supports_backend">(tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.supports_backend">(tensorrt_llm.llmapi.DraftTargetDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.supports_backend">(tensorrt_llm.llmapi.EagleDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.supports_backend">(tensorrt_llm.llmapi.LookaheadDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.supports_backend">(tensorrt_llm.llmapi.MedusaDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.supports_backend">(tensorrt_llm.llmapi.MTPDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.supports_backend">(tensorrt_llm.llmapi.NGramDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.supports_backend">(tensorrt_llm.llmapi.RocketSparseAttentionConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.supports_backend">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.supports_backend">(tensorrt_llm.llmapi.UserProvidedDecodingConfig method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.swapcase">swapcase() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.swapcase">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.swapcase">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.swapcase">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.swiglu">swiglu() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.sync_quant_config_with_kv_cache_config_dtype">sync_quant_config_with_kv_cache_config_dtype() (tensorrt_llm.llmapi.TorchLlmArgs method)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="T">T</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.tanh">tanh() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.temperature">temperature (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.temperature">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor">Tensor (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.tensor_parallel_size">tensor_parallel_size (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.tensor_parallel_size">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.TensorInfo">TensorInfo (class in tensorrt_llm.runtime)</a>
</li>
      <li>
    tensorrt_llm

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.functional.html#module-tensorrt_llm">module</a>, <a href="legacy/python-api/tensorrt_llm.layers.html#module-tensorrt_llm">[1]</a>, <a href="legacy/python-api/tensorrt_llm.models.html#module-tensorrt_llm">[2]</a>, <a href="legacy/python-api/tensorrt_llm.plugin.html#module-tensorrt_llm">[3]</a>, <a href="legacy/python-api/tensorrt_llm.quantization.html#module-tensorrt_llm">[4]</a>, <a href="legacy/python-api/tensorrt_llm.runtime.html#module-tensorrt_llm">[5]</a>
</li>
      </ul></li>
      <li><a href="_cpp_gen/executor.html#_CPPv412tensorrt_llm">tensorrt_llm (C++ type)</a>, <a href="_cpp_gen/executor.html#_CPPv412tensorrt_llm">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv412tensorrt_llm">[2]</a>, <a href="_cpp_gen/executor.html#_CPPv412tensorrt_llm">[3]</a>, <a href="_cpp_gen/executor.html#_CPPv412tensorrt_llm">[4]</a>, <a href="_cpp_gen/executor.html#_CPPv412tensorrt_llm">[5]</a>, <a href="_cpp_gen/executor.html#_CPPv412tensorrt_llm">[6]</a>, <a href="_cpp_gen/executor.html#_CPPv412tensorrt_llm">[7]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[8]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[9]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[10]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[11]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[12]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[13]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[14]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[15]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[16]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[17]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[18]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[19]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[20]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[21]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[22]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[23]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[24]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[25]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[26]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[27]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[28]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[29]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[30]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[31]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[32]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[33]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[34]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[35]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[36]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[37]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[38]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[39]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[40]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[41]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[42]</a>
</li>
      <li>
    tensorrt_llm.functional

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.functional.html#module-tensorrt_llm.functional">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.layers.activation

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.activation">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.layers.attention

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.attention">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.layers.cast

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.cast">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.layers.conv

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.conv">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.layers.embedding

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.embedding">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.layers.linear

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.linear">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.layers.mlp

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.mlp">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.layers.normalization

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.normalization">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.layers.pooling

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.pooling">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.models

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#module-tensorrt_llm.models">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.plugin

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.plugin.html#module-tensorrt_llm.plugin">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.quantization

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.quantization.html#module-tensorrt_llm.quantization">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.runtime

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#module-tensorrt_llm.runtime">module</a>
</li>
      </ul></li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm13batch_managerE">tensorrt_llm::batch_manager (C++ type)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm13batch_managerE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm13batch_managerE">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm13batch_managerE">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm13batch_managerE">[4]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm13batch_manager16kv_cache_managerE">tensorrt_llm::batch_manager::kv_cache_manager (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executorE">tensorrt_llm::executor (C++ type)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executorE">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executorE">[2]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executorE">[3]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executorE">[4]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executorE">[5]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executorE">[6]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executorE">[7]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor21AdditionalModelOutputE">tensorrt_llm::executor::AdditionalModelOutput (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput21AdditionalModelOutputENSt6stringEb">tensorrt_llm::executor::AdditionalModelOutput::AdditionalModelOutput (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput13gatherContextE">tensorrt_llm::executor::AdditionalModelOutput::gatherContext (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput4nameE">tensorrt_llm::executor::AdditionalModelOutput::name (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor21AdditionalModelOutputeqERK21AdditionalModelOutput">tensorrt_llm::executor::AdditionalModelOutput::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor16AdditionalOutputE">tensorrt_llm::executor::AdditionalOutput (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputENSt6stringE6Tensor">tensorrt_llm::executor::AdditionalOutput::AdditionalOutput (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERK16AdditionalOutput">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERR16AdditionalOutput">[2]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor16AdditionalOutput4nameE">tensorrt_llm::executor::AdditionalOutput::name (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERK16AdditionalOutput">tensorrt_llm::executor::AdditionalOutput::operator= (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERR16AdditionalOutput">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor16AdditionalOutput6outputE">tensorrt_llm::executor::AdditionalOutput::output (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor16AdditionalOutputD0Ev">tensorrt_llm::executor::AdditionalOutput::~AdditionalOutput (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12BatchingTypeE">tensorrt_llm::executor::BatchingType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12BatchingType9kINFLIGHTE">tensorrt_llm::executor::BatchingType::kINFLIGHT (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12BatchingType7kSTATICE">tensorrt_llm::executor::BatchingType::kSTATIC (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10BeamTokensE">tensorrt_llm::executor::BeamTokens (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10BufferViewE">tensorrt_llm::executor::BufferView (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15CacheSaltIDTypeE">tensorrt_llm::executor::CacheSaltIDType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfigE">tensorrt_llm::executor::CacheTransceiverConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendTypeE">tensorrt_llm::executor::CacheTransceiverConfig::BackendType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType7DEFAULTE">tensorrt_llm::executor::CacheTransceiverConfig::BackendType::DEFAULT (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType3MPIE">tensorrt_llm::executor::CacheTransceiverConfig::BackendType::MPI (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType4NIXLE">tensorrt_llm::executor::CacheTransceiverConfig::BackendType::NIXL (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType3UCXE">tensorrt_llm::executor::CacheTransceiverConfig::BackendType::UCX (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22CacheTransceiverConfigENSt8optionalI11BackendTypeEENSt8optionalI6size_tEENSt8optionalIiEE">tensorrt_llm::executor::CacheTransceiverConfig::CacheTransceiverConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig14getBackendTypeEv">tensorrt_llm::executor::CacheTransceiverConfig::getBackendType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig22getKvTransferTimeoutMsEv">tensorrt_llm::executor::CacheTransceiverConfig::getKvTransferTimeoutMs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig20getMaxTokensInBufferEv">tensorrt_llm::executor::CacheTransceiverConfig::getMaxTokensInBuffer (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig12mBackendTypeE">tensorrt_llm::executor::CacheTransceiverConfig::mBackendType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig20mKvTransferTimeoutMsE">tensorrt_llm::executor::CacheTransceiverConfig::mKvTransferTimeoutMs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig18mMaxTokensInBufferE">tensorrt_llm::executor::CacheTransceiverConfig::mMaxTokensInBuffer (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfigeqERK22CacheTransceiverConfig">tensorrt_llm::executor::CacheTransceiverConfig::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig14setBackendTypeENSt8optionalI11BackendTypeEE">tensorrt_llm::executor::CacheTransceiverConfig::setBackendType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22setKvTransferTimeoutMsENSt8optionalIiEE">tensorrt_llm::executor::CacheTransceiverConfig::setKvTransferTimeoutMs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig20setMaxTokensInBufferENSt8optionalI6size_tEE">tensorrt_llm::executor::CacheTransceiverConfig::setMaxTokensInBuffer (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicyE">tensorrt_llm::executor::CapacitySchedulerPolicy (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy20kGUARANTEED_NO_EVICTE">tensorrt_llm::executor::CapacitySchedulerPolicy::kGUARANTEED_NO_EVICT (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy16kMAX_UTILIZATIONE">tensorrt_llm::executor::CapacitySchedulerPolicy::kMAX_UTILIZATION (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy13kSTATIC_BATCHE">tensorrt_llm::executor::CapacitySchedulerPolicy::kSTATIC_BATCH (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17CommunicationModeE">tensorrt_llm::executor::CommunicationMode (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17CommunicationMode7kLEADERE">tensorrt_llm::executor::CommunicationMode::kLEADER (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17CommunicationMode13kORCHESTRATORE">tensorrt_llm::executor::CommunicationMode::kORCHESTRATOR (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17CommunicationTypeE">tensorrt_llm::executor::CommunicationType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17CommunicationType4kMPIE">tensorrt_llm::executor::CommunicationType::kMPI (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicyE">tensorrt_llm::executor::ContextChunkingPolicy (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicy15kEQUAL_PROGRESSE">tensorrt_llm::executor::ContextChunkingPolicy::kEQUAL_PROGRESS (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicy24kFIRST_COME_FIRST_SERVEDE">tensorrt_llm::executor::ContextChunkingPolicy::kFIRST_COME_FIRST_SERVED (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsE">tensorrt_llm::executor::ContextPhaseParams (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeNSt8optionalI9VecTokensEE">tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE">[2]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsERK18ContextPhaseParams">[3]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsERR18ContextPhaseParams">[4]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18ContextPhaseParams7deleterEPKv">tensorrt_llm::executor::ContextPhaseParams::deleter (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NKR12tensorrt_llm8executor18ContextPhaseParams14getDraftTokensEv">tensorrt_llm::executor::ContextPhaseParams::getDraftTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NKR12tensorrt_llm8executor18ContextPhaseParams17getFirstGenTokensEv">tensorrt_llm::executor::ContextPhaseParams::getFirstGenTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams8getReqIdEv">tensorrt_llm::executor::ContextPhaseParams::getReqId (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams18getSerializedStateEv">tensorrt_llm::executor::ContextPhaseParams::getSerializedState (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18ContextPhaseParams8getStateEv">tensorrt_llm::executor::ContextPhaseParams::getState (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams8getStateEv">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18ContextPhaseParams12mDraftTokensE">tensorrt_llm::executor::ContextPhaseParams::mDraftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18ContextPhaseParams15mFirstGenTokensE">tensorrt_llm::executor::ContextPhaseParams::mFirstGenTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18ContextPhaseParams6mReqIdE">tensorrt_llm::executor::ContextPhaseParams::mReqId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18ContextPhaseParams6mStateE">tensorrt_llm::executor::ContextPhaseParams::mState (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsaSERK18ContextPhaseParams">tensorrt_llm::executor::ContextPhaseParams::operator= (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsaSERR18ContextPhaseParams">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor18ContextPhaseParamseqERK18ContextPhaseParams">tensorrt_llm::executor::ContextPhaseParams::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NO12tensorrt_llm8executor18ContextPhaseParams17popFirstGenTokensEv">tensorrt_llm::executor::ContextPhaseParams::popFirstGenTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18ContextPhaseParams12releaseStateEv">tensorrt_llm::executor::ContextPhaseParams::releaseState (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18ContextPhaseParams13RequestIdTypeE">tensorrt_llm::executor::ContextPhaseParams::RequestIdType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18ContextPhaseParams8StatePtrE">tensorrt_llm::executor::ContextPhaseParams::StatePtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsD0Ev">tensorrt_llm::executor::ContextPhaseParams::~ContextPhaseParams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20DataTransceiverStateE">tensorrt_llm::executor::DataTransceiverState (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEN8kv_cache10CacheStateEN8kv_cache9CommStateE">tensorrt_llm::executor::DataTransceiverState::DataTransceiverState (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEv">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor20DataTransceiverState13getCacheStateEv">tensorrt_llm::executor::DataTransceiverState::getCacheState (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor20DataTransceiverState12getCommStateEv">tensorrt_llm::executor::DataTransceiverState::getCommState (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20DataTransceiverState11mCacheStateE">tensorrt_llm::executor::DataTransceiverState::mCacheState (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20DataTransceiverState10mCommStateE">tensorrt_llm::executor::DataTransceiverState::mCommState (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor20DataTransceiverStateeqERK20DataTransceiverState">tensorrt_llm::executor::DataTransceiverState::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20DataTransceiverState13setCacheStateEN8kv_cache10CacheStateE">tensorrt_llm::executor::DataTransceiverState::setCacheState (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20DataTransceiverState12setCommStateEN8kv_cache9CommStateE">tensorrt_llm::executor::DataTransceiverState::setCommState (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor20DataTransceiverState8toStringEv">tensorrt_llm::executor::DataTransceiverState::toString (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8DataTypeE">tensorrt_llm::executor::DataType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8DataType5kBF16E">tensorrt_llm::executor::DataType::kBF16 (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8DataType5kBOOLE">tensorrt_llm::executor::DataType::kBOOL (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8DataType5kFP16E">tensorrt_llm::executor::DataType::kFP16 (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8DataType5kFP32E">tensorrt_llm::executor::DataType::kFP32 (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8DataType4kFP8E">tensorrt_llm::executor::DataType::kFP8 (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8DataType6kINT32E">tensorrt_llm::executor::DataType::kINT32 (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8DataType6kINT64E">tensorrt_llm::executor::DataType::kINT64 (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8DataType5kINT8E">tensorrt_llm::executor::DataType::kINT8 (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8DataType6kUINT8E">tensorrt_llm::executor::DataType::kUINT8 (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8DataType8kUNKNOWNE">tensorrt_llm::executor::DataType::kUNKNOWN (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11DebugConfigE">tensorrt_llm::executor::DebugConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32">tensorrt_llm::executor::DebugConfig::DebugConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor11DebugConfig20getDebugInputTensorsEv">tensorrt_llm::executor::DebugConfig::getDebugInputTensors (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor11DebugConfig21getDebugOutputTensorsEv">tensorrt_llm::executor::DebugConfig::getDebugOutputTensors (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor11DebugConfig19getDebugTensorNamesEv">tensorrt_llm::executor::DebugConfig::getDebugTensorNames (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor11DebugConfig28getDebugTensorsMaxIterationsEv">tensorrt_llm::executor::DebugConfig::getDebugTensorsMaxIterations (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11DebugConfig18mDebugInputTensorsE">tensorrt_llm::executor::DebugConfig::mDebugInputTensors (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11DebugConfig19mDebugOutputTensorsE">tensorrt_llm::executor::DebugConfig::mDebugOutputTensors (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11DebugConfig17mDebugTensorNamesE">tensorrt_llm::executor::DebugConfig::mDebugTensorNames (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11DebugConfig26mDebugTensorsMaxIterationsE">tensorrt_llm::executor::DebugConfig::mDebugTensorsMaxIterations (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor11DebugConfigeqERK11DebugConfig">tensorrt_llm::executor::DebugConfig::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11DebugConfig20setDebugInputTensorsEb">tensorrt_llm::executor::DebugConfig::setDebugInputTensors (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11DebugConfig21setDebugOutputTensorsEb">tensorrt_llm::executor::DebugConfig::setDebugOutputTensors (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11DebugConfig19setDebugTensorNamesERK9StringVec">tensorrt_llm::executor::DebugConfig::setDebugTensorNames (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11DebugConfig28setDebugTensorsMaxIterationsE10SizeType32">tensorrt_llm::executor::DebugConfig::setDebugTensorsMaxIterations (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11DebugConfig9StringVecE">tensorrt_llm::executor::DebugConfig::StringVec (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIterationE">tensorrt_llm::executor::DebugTensorsPerIteration (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIteration12debugTensorsE">tensorrt_llm::executor::DebugTensorsPerIteration::debugTensors (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIteration4iterE">tensorrt_llm::executor::DebugTensorsPerIteration::iter (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14DecodingConfigE">tensorrt_llm::executor::DecodingConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE">tensorrt_llm::executor::DecodingConfig::DecodingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14DecodingConfig31enableSeamlessLookaheadDecodingEv">tensorrt_llm::executor::DecodingConfig::enableSeamlessLookaheadDecoding (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14DecodingConfig15getDecodingModeEv">tensorrt_llm::executor::DecodingConfig::getDecodingMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14DecodingConfig14getEagleConfigEv">tensorrt_llm::executor::DecodingConfig::getEagleConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14DecodingConfig26getLookaheadDecodingConfigEv">tensorrt_llm::executor::DecodingConfig::getLookaheadDecodingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14DecodingConfig33getLookaheadDecodingMaxNumRequestEv">tensorrt_llm::executor::DecodingConfig::getLookaheadDecodingMaxNumRequest (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14DecodingConfig16getMedusaChoicesEv">tensorrt_llm::executor::DecodingConfig::getMedusaChoices (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14DecodingConfig13mDecodingModeE">tensorrt_llm::executor::DecodingConfig::mDecodingMode (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14DecodingConfig12mEagleConfigE">tensorrt_llm::executor::DecodingConfig::mEagleConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14DecodingConfig24mLookaheadDecodingConfigE">tensorrt_llm::executor::DecodingConfig::mLookaheadDecodingConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14DecodingConfig31mLookaheadDecodingMaxNumRequestE">tensorrt_llm::executor::DecodingConfig::mLookaheadDecodingMaxNumRequest (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14DecodingConfig14mMedusaChoicesE">tensorrt_llm::executor::DecodingConfig::mMedusaChoices (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14DecodingConfigeqERK14DecodingConfig">tensorrt_llm::executor::DecodingConfig::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14DecodingConfig15setDecodingModeERK12DecodingMode">tensorrt_llm::executor::DecodingConfig::setDecodingMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14DecodingConfig14setEagleConfigERK11EagleConfig">tensorrt_llm::executor::DecodingConfig::setEagleConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14DecodingConfig26setLookaheadDecodingConfigERK23LookaheadDecodingConfig">tensorrt_llm::executor::DecodingConfig::setLookaheadDecodingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14DecodingConfig16setMedusaChoicesERK13MedusaChoices">tensorrt_llm::executor::DecodingConfig::setMedusaChoices (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingModeE">tensorrt_llm::executor::DecodingMode (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode9allBitSetE14UnderlyingType">tensorrt_llm::executor::DecodingMode::allBitSet (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode9anyBitSetE14UnderlyingType">tensorrt_llm::executor::DecodingMode::anyBitSet (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode4AutoEv">tensorrt_llm::executor::DecodingMode::Auto (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode10BeamSearchEv">tensorrt_llm::executor::DecodingMode::BeamSearch (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode12DecodingModeE14UnderlyingType">tensorrt_llm::executor::DecodingMode::DecodingMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode5EagleEv">tensorrt_llm::executor::DecodingMode::Eagle (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode19ExplicitDraftTokensEv">tensorrt_llm::executor::DecodingMode::ExplicitDraftTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode19ExternalDraftTokensEv">tensorrt_llm::executor::DecodingMode::ExternalDraftTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode7getNameEv">tensorrt_llm::executor::DecodingMode::getName (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode8getStateEv">tensorrt_llm::executor::DecodingMode::getState (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode6isAutoEv">tensorrt_llm::executor::DecodingMode::isAuto (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode12isBeamSearchEv">tensorrt_llm::executor::DecodingMode::isBeamSearch (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode7isEagleEv">tensorrt_llm::executor::DecodingMode::isEagle (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode21isExplicitDraftTokensEv">tensorrt_llm::executor::DecodingMode::isExplicitDraftTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode21isExternalDraftTokensEv">tensorrt_llm::executor::DecodingMode::isExternalDraftTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode11isLookaheadEv">tensorrt_llm::executor::DecodingMode::isLookahead (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode8isMedusaEv">tensorrt_llm::executor::DecodingMode::isMedusa (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode6isTopKEv">tensorrt_llm::executor::DecodingMode::isTopK (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode13isTopKandTopPEv">tensorrt_llm::executor::DecodingMode::isTopKandTopP (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode12isTopKorTopPEv">tensorrt_llm::executor::DecodingMode::isTopKorTopP (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode6isTopPEv">tensorrt_llm::executor::DecodingMode::isTopP (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseBanTokensEv">tensorrt_llm::executor::DecodingMode::isUseBanTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode13isUseBanWordsEv">tensorrt_llm::executor::DecodingMode::isUseBanWords (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode20isUseExplicitEosStopEv">tensorrt_llm::executor::DecodingMode::isUseExplicitEosStop (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode21isUseFrequencyPenaltyEv">tensorrt_llm::executor::DecodingMode::isUseFrequencyPenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode18isUseMaxLengthStopEv">tensorrt_llm::executor::DecodingMode::isUseMaxLengthStop (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseMinLengthEv">tensorrt_llm::executor::DecodingMode::isUseMinLength (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode9isUseMinPEv">tensorrt_llm::executor::DecodingMode::isUseMinP (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseNoRepeatNgramSizeEv">tensorrt_llm::executor::DecodingMode::isUseNoRepeatNgramSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseOccurrencePenaltyEv">tensorrt_llm::executor::DecodingMode::isUseOccurrencePenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode12isUsePenaltyEv">tensorrt_llm::executor::DecodingMode::isUsePenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode20isUsePresencePenaltyEv">tensorrt_llm::executor::DecodingMode::isUsePresencePenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseRepetitionPenaltyEv">tensorrt_llm::executor::DecodingMode::isUseRepetitionPenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode17isUseStopCriteriaEv">tensorrt_llm::executor::DecodingMode::isUseStopCriteria (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseStopWordsEv">tensorrt_llm::executor::DecodingMode::isUseStopWords (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode16isUseTemperatureEv">tensorrt_llm::executor::DecodingMode::isUseTemperature (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode28isUseVariableBeamWidthSearchEv">tensorrt_llm::executor::DecodingMode::isUseVariableBeamWidthSearch (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode5kAutoE">tensorrt_llm::executor::DecodingMode::kAuto (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode11kBeamSearchE">tensorrt_llm::executor::DecodingMode::kBeamSearch (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode6kEagleE">tensorrt_llm::executor::DecodingMode::kEagle (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode20kExplicitDraftTokensE">tensorrt_llm::executor::DecodingMode::kExplicitDraftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode20kExternalDraftTokensE">tensorrt_llm::executor::DecodingMode::kExternalDraftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode10kLookaheadE">tensorrt_llm::executor::DecodingMode::kLookahead (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode7kMedusaE">tensorrt_llm::executor::DecodingMode::kMedusa (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode9kNumFlagsE">tensorrt_llm::executor::DecodingMode::kNumFlags (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode5kTopKE">tensorrt_llm::executor::DecodingMode::kTopK (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode9kTopKTopPE">tensorrt_llm::executor::DecodingMode::kTopKTopP (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode5kTopPE">tensorrt_llm::executor::DecodingMode::kTopP (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseBanTokensE">tensorrt_llm::executor::DecodingMode::kUseBanTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode12kUseBanWordsE">tensorrt_llm::executor::DecodingMode::kUseBanWords (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode19kUseExplicitEosStopE">tensorrt_llm::executor::DecodingMode::kUseExplicitEosStop (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode22kUseFrequencyPenaltiesE">tensorrt_llm::executor::DecodingMode::kUseFrequencyPenalties (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode17kUseMaxLengthStopE">tensorrt_llm::executor::DecodingMode::kUseMaxLengthStop (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseMinLengthE">tensorrt_llm::executor::DecodingMode::kUseMinLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode8kUseMinPE">tensorrt_llm::executor::DecodingMode::kUseMinP (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode21kUseNoRepeatNgramSizeE">tensorrt_llm::executor::DecodingMode::kUseNoRepeatNgramSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode23kUseOccurrencePenaltiesE">tensorrt_llm::executor::DecodingMode::kUseOccurrencePenalties (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode13kUsePenaltiesE">tensorrt_llm::executor::DecodingMode::kUsePenalties (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode21kUsePresencePenaltiesE">tensorrt_llm::executor::DecodingMode::kUsePresencePenalties (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode23kUseRepetitionPenaltiesE">tensorrt_llm::executor::DecodingMode::kUseRepetitionPenalties (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode24kUseStandardStopCriteriaE">tensorrt_llm::executor::DecodingMode::kUseStandardStopCriteria (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseStopWordsE">tensorrt_llm::executor::DecodingMode::kUseStopWords (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode15kUseTemperatureE">tensorrt_llm::executor::DecodingMode::kUseTemperature (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode27kUseVariableBeamWidthSearchE">tensorrt_llm::executor::DecodingMode::kUseVariableBeamWidthSearch (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode9LookaheadEv">tensorrt_llm::executor::DecodingMode::Lookahead (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode6MedusaEv">tensorrt_llm::executor::DecodingMode::Medusa (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode6mStateE">tensorrt_llm::executor::DecodingMode::mState (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingModeeqERK12DecodingMode">tensorrt_llm::executor::DecodingMode::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode8setBitToE14UnderlyingTypeb">tensorrt_llm::executor::DecodingMode::setBitTo (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode4TopKEv">tensorrt_llm::executor::DecodingMode::TopK (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode8TopKTopPEv">tensorrt_llm::executor::DecodingMode::TopKTopP (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode4TopPEv">tensorrt_llm::executor::DecodingMode::TopP (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode14UnderlyingTypeE">tensorrt_llm::executor::DecodingMode::UnderlyingType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode12useBanTokensEb">tensorrt_llm::executor::DecodingMode::useBanTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode11useBanWordsEb">tensorrt_llm::executor::DecodingMode::useBanWords (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode18useExplicitEosStopEb">tensorrt_llm::executor::DecodingMode::useExplicitEosStop (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode19useFrequencyPenaltyEb">tensorrt_llm::executor::DecodingMode::useFrequencyPenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode16useMaxLengthStopEb">tensorrt_llm::executor::DecodingMode::useMaxLengthStop (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode12useMinLengthEb">tensorrt_llm::executor::DecodingMode::useMinLength (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode7useMinPEb">tensorrt_llm::executor::DecodingMode::useMinP (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode20useNoRepeatNgramSizeEb">tensorrt_llm::executor::DecodingMode::useNoRepeatNgramSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode22useOccurrencePenaltiesEb">tensorrt_llm::executor::DecodingMode::useOccurrencePenalties (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode18usePresencePenaltyEb">tensorrt_llm::executor::DecodingMode::usePresencePenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode20useRepetitionPenaltyEb">tensorrt_llm::executor::DecodingMode::useRepetitionPenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode12useStopWordsEb">tensorrt_llm::executor::DecodingMode::useStopWords (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode14useTemperatureEb">tensorrt_llm::executor::DecodingMode::useTemperature (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode26useVariableBeamWidthSearchEb">tensorrt_llm::executor::DecodingMode::useVariableBeamWidthSearch (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6detailE">tensorrt_llm::executor::detail (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6detail9DimType64E">tensorrt_llm::executor::detail::DimType64 (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE">tensorrt_llm::executor::detail::ofITensor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6detail9toITensorERK6Tensor">tensorrt_llm::executor::detail::toITensor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executorE">tensorrt_llm::executor::disagg_executor (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestratorE">tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator21awaitContextResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE">tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitContextResponses (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator24awaitGenerationResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE">tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitGenerationResponses (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator10canEnqueueEv">tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::canEnqueue (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb">tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator14enqueueContextERKNSt6vectorIN5texec7RequestEEENSt8optionalIiEEb">tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueContext (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb">tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueGeneration (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator19getContextExecutorsEv">tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::getContextExecutors (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator15getGenExecutorsEv">tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::getGenExecutors (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator5mImplE">tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::mImpl (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestratorD0Ev">tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::~DisaggExecutorOrchestrator (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdE">tensorrt_llm::executor::disagg_executor::ResponseWithId (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId3gidE">tensorrt_llm::executor::disagg_executor::ResponseWithId::gid (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERK14ResponseWithId">tensorrt_llm::executor::disagg_executor::ResponseWithId::operator= (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERR14ResponseWithId">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId8responseE">tensorrt_llm::executor::disagg_executor::ResponseWithId::response (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERK14ResponseWithId">tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERKN12tensorrt_llm8executor8ResponseE6IdType">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERR14ResponseWithId">[2]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERRN12tensorrt_llm8executor8ResponseE6IdType">[3]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdD0Ev">tensorrt_llm::executor::disagg_executor::ResponseWithId::~ResponseWithId (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22DisServingRequestStatsE">tensorrt_llm::executor::DisServingRequestStats (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22DisServingRequestStats11kvCacheSizeE">tensorrt_llm::executor::DisServingRequestStats::kvCacheSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22DisServingRequestStats17kvCacheTransferMSE">tensorrt_llm::executor::DisServingRequestStats::kvCacheTransferMS (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18DynamicBatchConfigE">tensorrt_llm::executor::DynamicBatchConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE">tensorrt_llm::executor::DynamicBatchConfig::DynamicBatchConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig17getBatchSizeTableEv">tensorrt_llm::executor::DynamicBatchConfig::getBatchSizeTable (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig34getDynamicBatchMovingAverageWindowEv">tensorrt_llm::executor::DynamicBatchConfig::getDynamicBatchMovingAverageWindow (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig24getEnableBatchSizeTuningEv">tensorrt_llm::executor::DynamicBatchConfig::getEnableBatchSizeTuning (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig27getEnableMaxNumTokensTuningEv">tensorrt_llm::executor::DynamicBatchConfig::getEnableMaxNumTokensTuning (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig22kDefaultBatchSizeTableE">tensorrt_llm::executor::DynamicBatchConfig::kDefaultBatchSizeTable (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig39kDefaultDynamicBatchMovingAverageWindowE">tensorrt_llm::executor::DynamicBatchConfig::kDefaultDynamicBatchMovingAverageWindow (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig15mBatchSizeTableE">tensorrt_llm::executor::DynamicBatchConfig::mBatchSizeTable (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig32mDynamicBatchMovingAverageWindowE">tensorrt_llm::executor::DynamicBatchConfig::mDynamicBatchMovingAverageWindow (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig22mEnableBatchSizeTuningE">tensorrt_llm::executor::DynamicBatchConfig::mEnableBatchSizeTuning (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig25mEnableMaxNumTokensTuningE">tensorrt_llm::executor::DynamicBatchConfig::mEnableMaxNumTokensTuning (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12EagleChoicesE">tensorrt_llm::executor::EagleChoices (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11EagleConfigE">tensorrt_llm::executor::EagleConfig (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11EagleConfig19checkPosteriorValueERKNSt8optionalIfEE">tensorrt_llm::executor::EagleConfig::checkPosteriorValue (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE">tensorrt_llm::executor::EagleConfig::EagleConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor11EagleConfig21getDynamicTreeMaxTopKEv">tensorrt_llm::executor::EagleConfig::getDynamicTreeMaxTopK (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor11EagleConfig15getEagleChoicesEv">tensorrt_llm::executor::EagleConfig::getEagleChoices (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor11EagleConfig21getPosteriorThresholdEv">tensorrt_llm::executor::EagleConfig::getPosteriorThreshold (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor11EagleConfig16isGreedySamplingEv">tensorrt_llm::executor::EagleConfig::isGreedySampling (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11EagleConfig19mDynamicTreeMaxTopKE">tensorrt_llm::executor::EagleConfig::mDynamicTreeMaxTopK (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11EagleConfig13mEagleChoicesE">tensorrt_llm::executor::EagleConfig::mEagleChoices (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11EagleConfig15mGreedySamplingE">tensorrt_llm::executor::EagleConfig::mGreedySampling (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11EagleConfig19mPosteriorThresholdE">tensorrt_llm::executor::EagleConfig::mPosteriorThreshold (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11EagleConfig15mUseDynamicTreeE">tensorrt_llm::executor::EagleConfig::mUseDynamicTree (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor11EagleConfigeqERK11EagleConfig">tensorrt_llm::executor::EagleConfig::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor11EagleConfig14useDynamicTreeEv">tensorrt_llm::executor::EagleConfig::useDynamicTree (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8ExecutorE">tensorrt_llm::executor::Executor (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERK6IdTypeRKNSt8optionalINSt6chrono12millisecondsEEE">tensorrt_llm::executor::Executor::awaitResponses (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt6vectorI6IdTypeEERKNSt8optionalINSt6chrono12millisecondsEEE">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt8optionalINSt6chrono12millisecondsEEE">[2]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor13cancelRequestE6IdType">tensorrt_llm::executor::Executor::cancelRequest (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8Executor18canEnqueueRequestsEv">tensorrt_llm::executor::Executor::canEnqueueRequests (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor14enqueueRequestERK7Request">tensorrt_llm::executor::Executor::enqueueRequest (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor15enqueueRequestsERKNSt6vectorI7RequestEE">tensorrt_llm::executor::Executor::enqueueRequests (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEENSt10shared_ptrI5ModelEERK14ExecutorConfig">tensorrt_llm::executor::Executor::Executor (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEERK14ExecutorConfig">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE">[2]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig">[3]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK8Executor">[4]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig">[5]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig">[6]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERR8Executor">[7]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8Executor22getKVCacheEventManagerEv">tensorrt_llm::executor::Executor::getKVCacheEventManager (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor21getLatestDebugTensorsEv">tensorrt_llm::executor::Executor::getLatestDebugTensors (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor23getLatestIterationStatsEv">tensorrt_llm::executor::Executor::getLatestIterationStats (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor21getLatestRequestStatsEv">tensorrt_llm::executor::Executor::getLatestRequestStats (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8Executor20getNumResponsesReadyERKNSt8optionalI6IdTypeEE">tensorrt_llm::executor::Executor::getNumResponsesReady (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8Executor13isParticipantEv">tensorrt_llm::executor::Executor::isParticipant (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor5mImplE">tensorrt_llm::executor::Executor::mImpl (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8ExecutoraSERK8Executor">tensorrt_llm::executor::Executor::operator= (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8ExecutoraSERR8Executor">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor8shutdownEv">tensorrt_llm::executor::Executor::shutdown (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8ExecutorD0Ev">tensorrt_llm::executor::Executor::~Executor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfigE">tensorrt_llm::executor::ExecutorConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb">tensorrt_llm::executor::ExecutorConfig::ExecutorConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getAdditionalModelOutputsEv">tensorrt_llm::executor::ExecutorConfig::getAdditionalModelOutputs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getBatchingTypeEv">tensorrt_llm::executor::ExecutorConfig::getBatchingType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getCacheTransceiverConfigEv">tensorrt_llm::executor::ExecutorConfig::getCacheTransceiverConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig14getDebugConfigEv">tensorrt_llm::executor::ExecutorConfig::getDebugConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig17getDecodingConfigEv">tensorrt_llm::executor::ExecutorConfig::getDecodingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig23getEnableChunkedContextEv">tensorrt_llm::executor::ExecutorConfig::getEnableChunkedContext (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig19getEnableTrtOverlapEv">tensorrt_llm::executor::ExecutorConfig::getEnableTrtOverlap (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig32getExtendedRuntimePerfKnobConfigEv">tensorrt_llm::executor::ExecutorConfig::getExtendedRuntimePerfKnobConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig36getFailFastOnAttentionWindowTooLargeEv">tensorrt_llm::executor::ExecutorConfig::getFailFastOnAttentionWindowTooLarge (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getGatherGenerationLogitsEv">tensorrt_llm::executor::ExecutorConfig::getGatherGenerationLogits (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig20getGpuWeightsPercentEv">tensorrt_llm::executor::ExecutorConfig::getGpuWeightsPercent (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig23getGuidedDecodingConfigEv">tensorrt_llm::executor::ExecutorConfig::getGuidedDecodingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getIterStatsMaxIterationsEv">tensorrt_llm::executor::ExecutorConfig::getIterStatsMaxIterations (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig16getKvCacheConfigEv">tensorrt_llm::executor::ExecutorConfig::getKvCacheConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig19getKvCacheConfigRefEv">tensorrt_llm::executor::ExecutorConfig::getKvCacheConfigRef (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig28getLogitsPostProcessorConfigEv">tensorrt_llm::executor::ExecutorConfig::getLogitsPostProcessorConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxBatchSizeEv">tensorrt_llm::executor::ExecutorConfig::getMaxBatchSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxBeamWidthEv">tensorrt_llm::executor::ExecutorConfig::getMaxBeamWidth (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxNumTokensEv">tensorrt_llm::executor::ExecutorConfig::getMaxNumTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxQueueSizeEv">tensorrt_llm::executor::ExecutorConfig::getMaxQueueSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getMaxSeqIdleMicrosecondsEv">tensorrt_llm::executor::ExecutorConfig::getMaxSeqIdleMicroseconds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig20getNormalizeLogProbsEv">tensorrt_llm::executor::ExecutorConfig::getNormalizeLogProbs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig17getParallelConfigEv">tensorrt_llm::executor::ExecutorConfig::getParallelConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig18getPeftCacheConfigEv">tensorrt_llm::executor::ExecutorConfig::getPeftCacheConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig24getPromptTableOffloadingEv">tensorrt_llm::executor::ExecutorConfig::getPromptTableOffloading (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig19getRecvPollPeriodMsEv">tensorrt_llm::executor::ExecutorConfig::getRecvPollPeriodMs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig28getRequestStatsMaxIterationsEv">tensorrt_llm::executor::ExecutorConfig::getRequestStatsMaxIterations (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig18getSchedulerConfigEv">tensorrt_llm::executor::ExecutorConfig::getSchedulerConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig21getSchedulerConfigRefEv">tensorrt_llm::executor::ExecutorConfig::getSchedulerConfigRef (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig16getSpecDecConfigEv">tensorrt_llm::executor::ExecutorConfig::getSpecDecConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig22getUseGpuDirectStorageEv">tensorrt_llm::executor::ExecutorConfig::getUseGpuDirectStorage (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig30kDefaultIterStatsMaxIterationsE">tensorrt_llm::executor::ExecutorConfig::kDefaultIterStatsMaxIterations (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig30kDefaultMaxSeqIdleMicrosecondsE">tensorrt_llm::executor::ExecutorConfig::kDefaultMaxSeqIdleMicroseconds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig33kDefaultRequestStatsMaxIterationsE">tensorrt_llm::executor::ExecutorConfig::kDefaultRequestStatsMaxIterations (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mAdditionalModelOutputsE">tensorrt_llm::executor::ExecutorConfig::mAdditionalModelOutputs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mBatchingTypeE">tensorrt_llm::executor::ExecutorConfig::mBatchingType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mCacheTransceiverConfigE">tensorrt_llm::executor::ExecutorConfig::mCacheTransceiverConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig12mDebugConfigE">tensorrt_llm::executor::ExecutorConfig::mDebugConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig15mDecodingConfigE">tensorrt_llm::executor::ExecutorConfig::mDecodingConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig21mEnableChunkedContextE">tensorrt_llm::executor::ExecutorConfig::mEnableChunkedContext (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig17mEnableTrtOverlapE">tensorrt_llm::executor::ExecutorConfig::mEnableTrtOverlap (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig30mExtendedRuntimePerfKnobConfigE">tensorrt_llm::executor::ExecutorConfig::mExtendedRuntimePerfKnobConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig34mFailFastOnAttentionWindowTooLargeE">tensorrt_llm::executor::ExecutorConfig::mFailFastOnAttentionWindowTooLarge (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mGatherGenerationLogitsE">tensorrt_llm::executor::ExecutorConfig::mGatherGenerationLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig18mGpuWeightsPercentE">tensorrt_llm::executor::ExecutorConfig::mGpuWeightsPercent (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig21mGuidedDecodingConfigE">tensorrt_llm::executor::ExecutorConfig::mGuidedDecodingConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mIterStatsMaxIterationsE">tensorrt_llm::executor::ExecutorConfig::mIterStatsMaxIterations (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig14mKvCacheConfigE">tensorrt_llm::executor::ExecutorConfig::mKvCacheConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mLogitsPostProcessorConfigE">tensorrt_llm::executor::ExecutorConfig::mLogitsPostProcessorConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxBatchSizeE">tensorrt_llm::executor::ExecutorConfig::mMaxBatchSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxBeamWidthE">tensorrt_llm::executor::ExecutorConfig::mMaxBeamWidth (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxNumTokensE">tensorrt_llm::executor::ExecutorConfig::mMaxNumTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxQueueSizeE">tensorrt_llm::executor::ExecutorConfig::mMaxQueueSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mMaxSeqIdleMicrosecondsE">tensorrt_llm::executor::ExecutorConfig::mMaxSeqIdleMicroseconds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig18mNormalizeLogProbsE">tensorrt_llm::executor::ExecutorConfig::mNormalizeLogProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig15mParallelConfigE">tensorrt_llm::executor::ExecutorConfig::mParallelConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig16mPeftCacheConfigE">tensorrt_llm::executor::ExecutorConfig::mPeftCacheConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig22mPromptTableOffloadingE">tensorrt_llm::executor::ExecutorConfig::mPromptTableOffloading (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig17mRecvPollPeriodMsE">tensorrt_llm::executor::ExecutorConfig::mRecvPollPeriodMs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mRequestStatsMaxIterationsE">tensorrt_llm::executor::ExecutorConfig::mRequestStatsMaxIterations (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig16mSchedulerConfigE">tensorrt_llm::executor::ExecutorConfig::mSchedulerConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mSpeculativeDecodingConfigE">tensorrt_llm::executor::ExecutorConfig::mSpeculativeDecodingConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig20mUseGpuDirectStorageE">tensorrt_llm::executor::ExecutorConfig::mUseGpuDirectStorage (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setAdditionalModelOutputsERKNSt6vectorI21AdditionalModelOutputEE">tensorrt_llm::executor::ExecutorConfig::setAdditionalModelOutputs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setBatchingTypeE12BatchingType">tensorrt_llm::executor::ExecutorConfig::setBatchingType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setCacheTransceiverConfigERK22CacheTransceiverConfig">tensorrt_llm::executor::ExecutorConfig::setCacheTransceiverConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig14setDebugConfigERK11DebugConfig">tensorrt_llm::executor::ExecutorConfig::setDebugConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setDecodingConfigERK14DecodingConfig">tensorrt_llm::executor::ExecutorConfig::setDecodingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setEnableChunkedContextEb">tensorrt_llm::executor::ExecutorConfig::setEnableChunkedContext (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setEnableTrtOverlapEb">tensorrt_llm::executor::ExecutorConfig::setEnableTrtOverlap (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig32setExtendedRuntimePerfKnobConfigERK29ExtendedRuntimePerfKnobConfig">tensorrt_llm::executor::ExecutorConfig::setExtendedRuntimePerfKnobConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig36setFailFastOnAttentionWindowTooLargeEb">tensorrt_llm::executor::ExecutorConfig::setFailFastOnAttentionWindowTooLarge (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setGatherGenerationLogitsEb">tensorrt_llm::executor::ExecutorConfig::setGatherGenerationLogits (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setGpuWeightsPercentERKf">tensorrt_llm::executor::ExecutorConfig::setGpuWeightsPercent (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setGuidedDecodingConfigERK20GuidedDecodingConfig">tensorrt_llm::executor::ExecutorConfig::setGuidedDecodingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setIterStatsMaxIterationsE10SizeType32">tensorrt_llm::executor::ExecutorConfig::setIterStatsMaxIterations (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setKvCacheConfigERK13KvCacheConfig">tensorrt_llm::executor::ExecutorConfig::setKvCacheConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setLogitsPostProcessorConfigERK25LogitsPostProcessorConfig">tensorrt_llm::executor::ExecutorConfig::setLogitsPostProcessorConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBatchSizeE10SizeType32">tensorrt_llm::executor::ExecutorConfig::setMaxBatchSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBeamWidthE10SizeType32">tensorrt_llm::executor::ExecutorConfig::setMaxBeamWidth (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxNumTokensE10SizeType32">tensorrt_llm::executor::ExecutorConfig::setMaxNumTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxQueueSizeERKNSt8optionalI10SizeType32EE">tensorrt_llm::executor::ExecutorConfig::setMaxQueueSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setMaxSeqIdleMicrosecondsE8uint64_t">tensorrt_llm::executor::ExecutorConfig::setMaxSeqIdleMicroseconds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setNormalizeLogProbsEb">tensorrt_llm::executor::ExecutorConfig::setNormalizeLogProbs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setParallelConfigERK14ParallelConfig">tensorrt_llm::executor::ExecutorConfig::setParallelConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setPeftCacheConfigERK15PeftCacheConfig">tensorrt_llm::executor::ExecutorConfig::setPeftCacheConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig24setPromptTableOffloadingEb">tensorrt_llm::executor::ExecutorConfig::setPromptTableOffloading (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setRecvPollPeriodMsERK10SizeType32">tensorrt_llm::executor::ExecutorConfig::setRecvPollPeriodMs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setRequestStatsMaxIterationsE10SizeType32">tensorrt_llm::executor::ExecutorConfig::setRequestStatsMaxIterations (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setSchedulerConfigERK15SchedulerConfig">tensorrt_llm::executor::ExecutorConfig::setSchedulerConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setSpecDecConfigERK25SpeculativeDecodingConfig">tensorrt_llm::executor::ExecutorConfig::setSpecDecConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig22setUseGpuDirectStorageERKb">tensorrt_llm::executor::ExecutorConfig::setUseGpuDirectStorage (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfigE">tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32">tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::ExtendedRuntimePerfKnobConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig21getCudaGraphCacheSizeEv">tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::getCudaGraphCacheSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig16getCudaGraphModeEv">tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::getCudaGraphMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig27getEnableContextFMHAFP32AccEv">tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::getEnableContextFMHAFP32Acc (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig17getMultiBlockModeEv">tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::getMultiBlockMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig19mCudaGraphCacheSizeE">tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::mCudaGraphCacheSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig14mCudaGraphModeE">tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::mCudaGraphMode (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig25mEnableContextFMHAFP32AccE">tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::mEnableContextFMHAFP32Acc (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig15mMultiBlockModeE">tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::mMultiBlockMode (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfigeqERK29ExtendedRuntimePerfKnobConfig">tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig21setCudaGraphCacheSizeE10SizeType32">tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setCudaGraphCacheSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig16setCudaGraphModeEb">tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setCudaGraphMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig27setEnableContextFMHAFP32AccEb">tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setEnableContextFMHAFP32Acc (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig17setMultiBlockModeEb">tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setMultiBlockMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfigE">tensorrt_llm::executor::ExternalDraftTokensConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE">tensorrt_llm::executor::ExternalDraftTokensConfig::ExternalDraftTokensConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig22getAcceptanceThresholdEv">tensorrt_llm::executor::ExternalDraftTokensConfig::getAcceptanceThreshold (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig13getFastLogitsEv">tensorrt_llm::executor::ExternalDraftTokensConfig::getFastLogits (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig9getLogitsEv">tensorrt_llm::executor::ExternalDraftTokensConfig::getLogits (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig9getTokensEv">tensorrt_llm::executor::ExternalDraftTokensConfig::getTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig20mAcceptanceThresholdE">tensorrt_llm::executor::ExternalDraftTokensConfig::mAcceptanceThreshold (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig11mFastLogitsE">tensorrt_llm::executor::ExternalDraftTokensConfig::mFastLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig7mLogitsE">tensorrt_llm::executor::ExternalDraftTokensConfig::mLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig7mTokensE">tensorrt_llm::executor::ExternalDraftTokensConfig::mTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12FinishReasonE">tensorrt_llm::executor::FinishReason (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12FinishReason10kCANCELLEDE">tensorrt_llm::executor::FinishReason::kCANCELLED (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12FinishReason7kEND_IDE">tensorrt_llm::executor::FinishReason::kEND_ID (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12FinishReason7kLENGTHE">tensorrt_llm::executor::FinishReason::kLENGTH (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12FinishReason13kNOT_FINISHEDE">tensorrt_llm::executor::FinishReason::kNOT_FINISHED (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12FinishReason11kSTOP_WORDSE">tensorrt_llm::executor::FinishReason::kSTOP_WORDS (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12FinishReason10kTIMED_OUTE">tensorrt_llm::executor::FinishReason::kTIMED_OUT (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor9FloatTypeE">tensorrt_llm::executor::FloatType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfigE">tensorrt_llm::executor::GuidedDecodingConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig10getBackendEv">tensorrt_llm::executor::GuidedDecodingConfig::getBackend (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getEncodedVocabEv">tensorrt_llm::executor::GuidedDecodingConfig::getEncodedVocab (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getStopTokenIdsEv">tensorrt_llm::executor::GuidedDecodingConfig::getStopTokenIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getTokenizerStrEv">tensorrt_llm::executor::GuidedDecodingConfig::getTokenizerStr (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackendE">tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingBackend (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackend11kLLGUIDANCEE">tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingBackend::kLLGUIDANCE (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackend9kXGRAMMARE">tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingBackend::kXGRAMMAR (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE">tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig8mBackendE">tensorrt_llm::executor::GuidedDecodingConfig::mBackend (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mEncodedVocabE">tensorrt_llm::executor::GuidedDecodingConfig::mEncodedVocab (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mStopTokenIdsE">tensorrt_llm::executor::GuidedDecodingConfig::mStopTokenIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mTokenizerStrE">tensorrt_llm::executor::GuidedDecodingConfig::mTokenizerStr (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfigeqERK20GuidedDecodingConfig">tensorrt_llm::executor::GuidedDecodingConfig::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig10setBackendERK21GuidedDecodingBackend">tensorrt_llm::executor::GuidedDecodingConfig::setBackend (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setEncodedVocabERKNSt6vectorINSt6stringEEE">tensorrt_llm::executor::GuidedDecodingConfig::setEncodedVocab (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setStopTokenIdsERKNSt6vectorI11TokenIdTypeEE">tensorrt_llm::executor::GuidedDecodingConfig::setStopTokenIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setTokenizerStrERKNSt6stringE">tensorrt_llm::executor::GuidedDecodingConfig::setTokenizerStr (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig8validateEv">tensorrt_llm::executor::GuidedDecodingConfig::validate (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingParamsE">tensorrt_llm::executor::GuidedDecodingParams (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParams8getGuideEv">tensorrt_llm::executor::GuidedDecodingParams::getGuide (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParams12getGuideTypeEv">tensorrt_llm::executor::GuidedDecodingParams::getGuideType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams20GuidedDecodingParamsE9GuideTypeNSt8optionalINSt6stringEEE">tensorrt_llm::executor::GuidedDecodingParams::GuidedDecodingParams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideTypeE">tensorrt_llm::executor::GuidedDecodingParams::GuideType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType13kEBNF_GRAMMARE">tensorrt_llm::executor::GuidedDecodingParams::GuideType::kEBNF_GRAMMAR (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType5kJSONE">tensorrt_llm::executor::GuidedDecodingParams::GuideType::kJSON (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType12kJSON_SCHEMAE">tensorrt_llm::executor::GuidedDecodingParams::GuideType::kJSON_SCHEMA (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType6kREGEXE">tensorrt_llm::executor::GuidedDecodingParams::GuideType::kREGEX (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType15kSTRUCTURAL_TAGE">tensorrt_llm::executor::GuidedDecodingParams::GuideType::kSTRUCTURAL_TAG (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams6mGuideE">tensorrt_llm::executor::GuidedDecodingParams::mGuide (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams10mGuideTypeE">tensorrt_llm::executor::GuidedDecodingParams::mGuideType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParamseqERK20GuidedDecodingParams">tensorrt_llm::executor::GuidedDecodingParams::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6IdTypeE">tensorrt_llm::executor::IdType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor21InflightBatchingStatsE">tensorrt_llm::executor::InflightBatchingStats (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor21InflightBatchingStats26avgNumDecodedTokensPerIterE">tensorrt_llm::executor::InflightBatchingStats::avgNumDecodedTokensPerIter (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor21InflightBatchingStats12microBatchIdE">tensorrt_llm::executor::InflightBatchingStats::microBatchId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor21InflightBatchingStats18numContextRequestsE">tensorrt_llm::executor::InflightBatchingStats::numContextRequests (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor21InflightBatchingStats12numCtxTokensE">tensorrt_llm::executor::InflightBatchingStats::numCtxTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor21InflightBatchingStats14numGenRequestsE">tensorrt_llm::executor::InflightBatchingStats::numGenRequests (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor21InflightBatchingStats17numPausedRequestsE">tensorrt_llm::executor::InflightBatchingStats::numPausedRequests (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor21InflightBatchingStats20numScheduledRequestsE">tensorrt_llm::executor::InflightBatchingStats::numScheduledRequests (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStatsE">tensorrt_llm::executor::IterationStats (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats11cpuMemUsageE">tensorrt_llm::executor::IterationStats::cpuMemUsage (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats17crossKvCacheStatsE">tensorrt_llm::executor::IterationStats::crossKvCacheStats (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats11gpuMemUsageE">tensorrt_llm::executor::IterationStats::gpuMemUsage (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats21inflightBatchingStatsE">tensorrt_llm::executor::IterationStats::inflightBatchingStats (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats4iterE">tensorrt_llm::executor::IterationStats::iter (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats13iterLatencyMSE">tensorrt_llm::executor::IterationStats::iterLatencyMS (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats12kvCacheStatsE">tensorrt_llm::executor::IterationStats::kvCacheStats (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats19maxBatchSizeRuntimeE">tensorrt_llm::executor::IterationStats::maxBatchSizeRuntime (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats18maxBatchSizeStaticE">tensorrt_llm::executor::IterationStats::maxBatchSizeStatic (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats28maxBatchSizeTunerRecommendedE">tensorrt_llm::executor::IterationStats::maxBatchSizeTunerRecommended (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats20maxNumActiveRequestsE">tensorrt_llm::executor::IterationStats::maxNumActiveRequests (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats19maxNumTokensRuntimeE">tensorrt_llm::executor::IterationStats::maxNumTokensRuntime (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats18maxNumTokensStaticE">tensorrt_llm::executor::IterationStats::maxNumTokensStatic (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats28maxNumTokensTunerRecommendedE">tensorrt_llm::executor::IterationStats::maxNumTokensTunerRecommended (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats31newActiveRequestsQueueLatencyMSE">tensorrt_llm::executor::IterationStats::newActiveRequestsQueueLatencyMS (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats17numActiveRequestsE">tensorrt_llm::executor::IterationStats::numActiveRequests (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats20numCompletedRequestsE">tensorrt_llm::executor::IterationStats::numCompletedRequests (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats20numNewActiveRequestsE">tensorrt_llm::executor::IterationStats::numNewActiveRequests (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats17numQueuedRequestsE">tensorrt_llm::executor::IterationStats::numQueuedRequests (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats14pinnedMemUsageE">tensorrt_llm::executor::IterationStats::pinnedMemUsage (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats17specDecodingStatsE">tensorrt_llm::executor::IterationStats::specDecodingStats (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats19staticBatchingStatsE">tensorrt_llm::executor::IterationStats::staticBatchingStats (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats9timestampE">tensorrt_llm::executor::IterationStats::timestamp (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13IterationTypeE">tensorrt_llm::executor::IterationType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17JsonSerializationE">tensorrt_llm::executor::JsonSerialization (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK12RequestStats">tensorrt_llm::executor::JsonSerialization::toJsonStr (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK14IterationStats">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK24RequestStatsPerIteration">[2]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cacheE">tensorrt_llm::executor::kv_cache (C++ type)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cacheE">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cacheE">[2]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cacheE">[3]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDescE">tensorrt_llm::executor::kv_cache::AgentDesc (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDesc9AgentDescENSt6stringE">tensorrt_llm::executor::kv_cache::AgentDesc::AgentDesc (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache9AgentDesc19getBackendAgentDescEv">tensorrt_llm::executor::kv_cache::AgentDesc::getBackendAgentDesc (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDesc17mBackendAgentDescE">tensorrt_llm::executor::kv_cache::AgentDesc::mBackendAgentDesc (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10AgentStateE">tensorrt_llm::executor::kv_cache::AgentState (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateENSt6stringENSt6stringE">tensorrt_llm::executor::kv_cache::AgentState::AgentState (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateEv">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10mAgentNameE">tensorrt_llm::executor::kv_cache::AgentState::mAgentName (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState15mConnectionInfoE">tensorrt_llm::executor::kv_cache::AgentState::mConnectionInfo (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache10AgentStateeqERK10AgentState">tensorrt_llm::executor::kv_cache::AgentState::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache10AgentState8toStringEv">tensorrt_llm::executor::kv_cache::AgentState::toString (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfigE">tensorrt_llm::executor::kv_cache::BaseAgentConfig (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfig5mNameE">tensorrt_llm::executor::kv_cache::BaseAgentConfig::mName (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfig11multiThreadE">tensorrt_llm::executor::kv_cache::BaseAgentConfig::multiThread (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfig13useProgThreadE">tensorrt_llm::executor::kv_cache::BaseAgentConfig::useProgThread (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgentE">tensorrt_llm::executor::kv_cache::BaseLoopbackAgent (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgent22executeLoopbackRequestERK11MemoryDescsRK9FileDescsb">tensorrt_llm::executor::kv_cache::BaseLoopbackAgent::executeLoopbackRequest (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgentD0Ev">tensorrt_llm::executor::kv_cache::BaseLoopbackAgent::~BaseLoopbackAgent (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgentE">tensorrt_llm::executor::kv_cache::BaseTransferAgent (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16checkRemoteDescsERKNSt6stringERK11MemoryDescs">tensorrt_llm::executor::kv_cache::BaseTransferAgent::checkRemoteDescs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16deregisterMemoryERK13RegisterDescs">tensorrt_llm::executor::kv_cache::BaseTransferAgent::deregisterMemory (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17getLocalAgentDescEv">tensorrt_llm::executor::kv_cache::BaseTransferAgent::getLocalAgentDesc (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent22getLocalConnectionInfoEv">tensorrt_llm::executor::kv_cache::BaseTransferAgent::getLocalConnectionInfo (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent23getNotifiedSyncMessagesEv">tensorrt_llm::executor::kv_cache::BaseTransferAgent::getNotifiedSyncMessages (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent21invalidateRemoteAgentERKNSt6stringE">tensorrt_llm::executor::kv_cache::BaseTransferAgent::invalidateRemoteAgent (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK18ConnectionInfoType">tensorrt_llm::executor::kv_cache::BaseTransferAgent::loadRemoteAgent (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK9AgentDesc">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17notifySyncMessageERKNSt6stringERK11SyncMessage">tensorrt_llm::executor::kv_cache::BaseTransferAgent::notifySyncMessage (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent14registerMemoryERK13RegisterDescs">tensorrt_llm::executor::kv_cache::BaseTransferAgent::registerMemory (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent22submitTransferRequestsERK15TransferRequest">tensorrt_llm::executor::kv_cache::BaseTransferAgent::submitTransferRequests (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgentD0Ev">tensorrt_llm::executor::kv_cache::BaseTransferAgent::~BaseTransferAgent (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10CacheStateE">tensorrt_llm::executor::kv_cache::CacheState (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfigE">tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig15AttentionConfigE13AttentionTypei">tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::AttentionConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig14mAttentionTypeE">tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::mAttentionType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig9mKvFactorE">tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::mKvFactor (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfigeqERK15AttentionConfig">tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionTypeE">tensorrt_llm::executor::kv_cache::CacheState::AttentionType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionType8kDEFAULTE">tensorrt_llm::executor::kv_cache::CacheState::AttentionType::kDEFAULT (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionType4kMLAE">tensorrt_llm::executor::kv_cache::CacheState::AttentionType::kMLA (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiib">tensorrt_llm::executor::kv_cache::CacheState::CacheState (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeib">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiib">[2]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState18getAttentionConfigEv">tensorrt_llm::executor::kv_cache::CacheState::getAttentionConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState11getDataTypeEv">tensorrt_llm::executor::kv_cache::CacheState::getDataType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState19getEnableBlockReuseEv">tensorrt_llm::executor::kv_cache::CacheState::getEnableBlockReuse (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState14getModelConfigEv">tensorrt_llm::executor::kv_cache::CacheState::getModelConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState17getParallelConfigEv">tensorrt_llm::executor::kv_cache::CacheState::getParallelConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState16mAttentionConfigE">tensorrt_llm::executor::kv_cache::CacheState::mAttentionConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState9mDataTypeE">tensorrt_llm::executor::kv_cache::CacheState::mDataType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState17mEnableBlockReuseE">tensorrt_llm::executor::kv_cache::CacheState::mEnableBlockReuse (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState12mModelConfigE">tensorrt_llm::executor::kv_cache::CacheState::mModelConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfigE">tensorrt_llm::executor::kv_cache::CacheState::ModelConfig (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig18mNbKvHeadsPerLayerE">tensorrt_llm::executor::kv_cache::CacheState::ModelConfig::mNbKvHeadsPerLayer (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig12mSizePerHeadE">tensorrt_llm::executor::kv_cache::CacheState::ModelConfig::mSizePerHead (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig15mTokensPerBlockE">tensorrt_llm::executor::kv_cache::CacheState::ModelConfig::mTokensPerBlock (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState11ModelConfigeqERK11ModelConfig">tensorrt_llm::executor::kv_cache::CacheState::ModelConfig::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15mParallelConfigE">tensorrt_llm::executor::kv_cache::CacheState::mParallelConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheStateeqERKN8kv_cache10CacheStateE">tensorrt_llm::executor::kv_cache::CacheState::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfigE">tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig23mAttentionLayerNumPerPPE">tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mAttentionLayerNumPerPP (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig19mContextParallelismE">tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mContextParallelism (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig7mDPrankE">tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mDPrank (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig7mDPsizeE">tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mDPsize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig18mEnableAttentionDPE">tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mEnableAttentionDP (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig20mPipelineParallelismE">tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mPipelineParallelism (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig18mTensorParallelismE">tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mTensorParallelism (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfigeqERK14ParallelConfig">tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState8toStringEv">tensorrt_llm::executor::kv_cache::CacheState::toString (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache9CommStateE">tensorrt_llm::executor::kv_cache::CommState (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10AgentStateEEi">tensorrt_llm::executor::kv_cache::CommState::CommState (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10SizeType32EEi">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI11SocketStateEEi">[2]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt8uint16_tENSt6stringE">[3]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateEv">[4]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState13getAgentStateEv">tensorrt_llm::executor::kv_cache::CommState::getAgentState (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState11getMpiStateEv">tensorrt_llm::executor::kv_cache::CommState::getMpiState (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState10getSelfIdxEv">tensorrt_llm::executor::kv_cache::CommState::getSelfIdx (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState14getSocketStateEv">tensorrt_llm::executor::kv_cache::CommState::getSocketState (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState12isAgentStateEv">tensorrt_llm::executor::kv_cache::CommState::isAgentState (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState10isMpiStateEv">tensorrt_llm::executor::kv_cache::CommState::isMpiState (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState13isSocketStateEv">tensorrt_llm::executor::kv_cache::CommState::isSocketState (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache9CommState8mSelfIdxE">tensorrt_llm::executor::kv_cache::CommState::mSelfIdx (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache9CommState6mStateE">tensorrt_llm::executor::kv_cache::CommState::mState (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache9CommStateeqERK9CommState">tensorrt_llm::executor::kv_cache::CommState::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState8toStringEv">tensorrt_llm::executor::kv_cache::CommState::toString (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10ConnectionE">tensorrt_llm::executor::kv_cache::Connection (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection12isThreadSafeEv">tensorrt_llm::executor::kv_cache::Connection::isThreadSafe (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4recvERK11DataContextPv6size_t">tensorrt_llm::executor::kv_cache::Connection::recv (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4sendERK11DataContextPKv6size_t">tensorrt_llm::executor::kv_cache::Connection::send (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10ConnectionD0Ev">tensorrt_llm::executor::kv_cache::Connection::~Connection (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache18ConnectionInfoTypeE">tensorrt_llm::executor::kv_cache::ConnectionInfoType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManagerE">tensorrt_llm::executor::kv_cache::ConnectionManager (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache17ConnectionManager12getCommStateEv">tensorrt_llm::executor::kv_cache::ConnectionManager::getCommState (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager14getConnectionsERK9CommState">tensorrt_llm::executor::kv_cache::ConnectionManager::getConnections (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager11recvConnectERK11DataContextPv6size_t">tensorrt_llm::executor::kv_cache::ConnectionManager::recvConnect (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManagerD0Ev">tensorrt_llm::executor::kv_cache::ConnectionManager::~ConnectionManager (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache11DataContextE">tensorrt_llm::executor::kv_cache::DataContext (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache11DataContext11DataContextEi">tensorrt_llm::executor::kv_cache::DataContext::DataContext (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache11DataContext6getTagEv">tensorrt_llm::executor::kv_cache::DataContext::getTag (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache11DataContext4mTagE">tensorrt_llm::executor::kv_cache::DataContext::mTag (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderE">tensorrt_llm::executor::kv_cache::DynLibLoader (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader5dlSymEPvPKc">tensorrt_llm::executor::kv_cache::DynLibLoader::dlSym (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader12DynLibLoaderERK12DynLibLoader">tensorrt_llm::executor::kv_cache::DynLibLoader::DynLibLoader (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader12DynLibLoaderEv">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4I0EN12tensorrt_llm8executor8kv_cache12DynLibLoader18getFunctionPointerE9FunctionTRKNSt6stringERKNSt6stringE">tensorrt_llm::executor::kv_cache::DynLibLoader::getFunctionPointer (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9getHandleERKNSt6stringE">tensorrt_llm::executor::kv_cache::DynLibLoader::getHandle (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader11getInstanceEv">tensorrt_llm::executor::kv_cache::DynLibLoader::getInstance (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9mDllMutexE">tensorrt_llm::executor::kv_cache::DynLibLoader::mDllMutex (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9mHandlersE">tensorrt_llm::executor::kv_cache::DynLibLoader::mHandlers (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderaSERK12DynLibLoader">tensorrt_llm::executor::kv_cache::DynLibLoader::operator= (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderD0Ev">tensorrt_llm::executor::kv_cache::DynLibLoader::~DynLibLoader (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescE">tensorrt_llm::executor::kv_cache::FileDesc (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc2fdE">tensorrt_llm::executor::kv_cache::FileDesc::fd (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERK8FileDesc">tensorrt_llm::executor::kv_cache::FileDesc::FileDesc (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERKNSt6stringEi6mode_t6size_t">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERR8FileDesc">[2]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache8FileDesc5getFdEv">tensorrt_llm::executor::kv_cache::FileDesc::getFd (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache8FileDesc6getLenEv">tensorrt_llm::executor::kv_cache::FileDesc::getLen (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc4mLenE">tensorrt_llm::executor::kv_cache::FileDesc::mLen (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescaSERK8FileDesc">tensorrt_llm::executor::kv_cache::FileDesc::operator= (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescaSERR8FileDesc">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescD0Ev">tensorrt_llm::executor::kv_cache::FileDesc::~FileDesc (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache9FileDescsE">tensorrt_llm::executor::kv_cache::FileDescs (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache9FileDescs9FileDescsERRNSt6vectorI8FileDescEE">tensorrt_llm::executor::kv_cache::FileDescs::FileDescs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache9FileDescs8getDescsEv">tensorrt_llm::executor::kv_cache::FileDescs::getDescs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache9FileDescs6mDescsE">tensorrt_llm::executor::kv_cache::FileDescs::mDescs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeLoopbackAgentENSt10shared_ptrI17BaseLoopbackAgentEERKNSt6stringEDpRR4Args">tensorrt_llm::executor::kv_cache::makeLoopbackAgent (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeTransferAgentENSt10unique_ptrI17BaseTransferAgentEERKNSt6stringEDpRR4Args">tensorrt_llm::executor::kv_cache::makeTransferAgent (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDescE">tensorrt_llm::executor::kv_cache::MemoryDesc (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc11deserializeERNSt7istreamE">tensorrt_llm::executor::kv_cache::MemoryDesc::deserialize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc7getAddrEv">tensorrt_llm::executor::kv_cache::MemoryDesc::getAddr (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc11getDeviceIdEv">tensorrt_llm::executor::kv_cache::MemoryDesc::getDeviceId (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc6getLenEv">tensorrt_llm::executor::kv_cache::MemoryDesc::getLen (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc5mAddrE">tensorrt_llm::executor::kv_cache::MemoryDesc::mAddr (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9mDeviceIdE">tensorrt_llm::executor::kv_cache::MemoryDesc::mDeviceId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescE9uintptr_t6size_t8uint32_t">tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescEPv6size_t8uint32_t">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescERKNSt6vectorIcEE8uint32_t">[2]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc4mLenE">tensorrt_llm::executor::kv_cache::MemoryDesc::mLen (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9serializeERK10MemoryDescRNSt7ostreamE">tensorrt_llm::executor::kv_cache::MemoryDesc::serialize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc14serializedSizeERK10MemoryDesc">tensorrt_llm::executor::kv_cache::MemoryDesc::serializedSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescsE">tensorrt_llm::executor::kv_cache::MemoryDescs (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache11MemoryDescs8getDescsEv">tensorrt_llm::executor::kv_cache::MemoryDescs::getDescs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache11MemoryDescs7getTypeEv">tensorrt_llm::executor::kv_cache::MemoryDescs::getType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs6mDescsE">tensorrt_llm::executor::kv_cache::MemoryDescs::mDescs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs11MemoryDescsE10MemoryTypeNSt6vectorI10MemoryDescEE">tensorrt_llm::executor::kv_cache::MemoryDescs::MemoryDescs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs5mTypeE">tensorrt_llm::executor::kv_cache::MemoryDescs::mType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryTypeE">tensorrt_llm::executor::kv_cache::MemoryType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType4kBLKE">tensorrt_llm::executor::kv_cache::MemoryType::kBLK (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kDRAME">tensorrt_llm::executor::kv_cache::MemoryType::kDRAM (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kFILEE">tensorrt_llm::executor::kv_cache::MemoryType::kFILE (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType4kOBJE">tensorrt_llm::executor::kv_cache::MemoryType::kOBJ (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kVRAME">tensorrt_llm::executor::kv_cache::MemoryType::kVRAM (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache8MpiStateE">tensorrt_llm::executor::kv_cache::MpiState (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache8MpiState6mRanksE">tensorrt_llm::executor::kv_cache::MpiState::mRanks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache8MpiStateeqERK8MpiState">tensorrt_llm::executor::kv_cache::MpiState::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache8MpiState8toStringEv">tensorrt_llm::executor::kv_cache::MpiState::toString (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache13RegisterDescsE">tensorrt_llm::executor::kv_cache::RegisterDescs (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache11SocketStateE">tensorrt_llm::executor::kv_cache::SocketState (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache11SocketState3mIpE">tensorrt_llm::executor::kv_cache::SocketState::mIp (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache11SocketState5mPortE">tensorrt_llm::executor::kv_cache::SocketState::mPort (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache11SocketStateeqERK11SocketState">tensorrt_llm::executor::kv_cache::SocketState::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache11SocketState8toStringEv">tensorrt_llm::executor::kv_cache::SocketState::toString (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache11SyncMessageE">tensorrt_llm::executor::kv_cache::SyncMessage (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache13TransferDescsE">tensorrt_llm::executor::kv_cache::TransferDescs (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOpE">tensorrt_llm::executor::kv_cache::TransferOp (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOp5kREADE">tensorrt_llm::executor::kv_cache::TransferOp::kREAD (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOp6kWRITEE">tensorrt_llm::executor::kv_cache::TransferOp::kWRITE (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequestE">tensorrt_llm::executor::kv_cache::TransferRequest (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest11getDstDescsEv">tensorrt_llm::executor::kv_cache::TransferRequest::getDstDescs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest5getOpEv">tensorrt_llm::executor::kv_cache::TransferRequest::getOp (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest13getRemoteNameEv">tensorrt_llm::executor::kv_cache::TransferRequest::getRemoteName (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest11getSrcDescsEv">tensorrt_llm::executor::kv_cache::TransferRequest::getSrcDescs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest14getSyncMessageEv">tensorrt_llm::executor::kv_cache::TransferRequest::getSyncMessage (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest9mDstDescsE">tensorrt_llm::executor::kv_cache::TransferRequest::mDstDescs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest3mOpE">tensorrt_llm::executor::kv_cache::TransferRequest::mOp (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest11mRemoteNameE">tensorrt_llm::executor::kv_cache::TransferRequest::mRemoteName (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest9mSrcDescsE">tensorrt_llm::executor::kv_cache::TransferRequest::mSrcDescs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest12mSyncMessageE">tensorrt_llm::executor::kv_cache::TransferRequest::mSyncMessage (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE">tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache14TransferStatusE">tensorrt_llm::executor::kv_cache::TransferStatus (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache14TransferStatus11isCompletedEv">tensorrt_llm::executor::kv_cache::TransferStatus::isCompleted (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8kv_cache14TransferStatus4waitEv">tensorrt_llm::executor::kv_cache::TransferStatus::wait (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cache14TransferStatusD0Ev">tensorrt_llm::executor::kv_cache::TransferStatus::~TransferStatus (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfigE">tensorrt_llm::executor::KvCacheConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig34fillEmptyFieldsFromRuntimeDefaultsERKN12tensorrt_llm7runtime15RuntimeDefaultsE">tensorrt_llm::executor::KvCacheConfig::fillEmptyFieldsFromRuntimeDefaults (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor13KvCacheConfig34getAttentionDpEventsGatherPeriodMsEv">tensorrt_llm::executor::KvCacheConfig::getAttentionDpEventsGatherPeriodMs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getCopyOnPartialReuseEv">tensorrt_llm::executor::KvCacheConfig::getCopyOnPartialReuse (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor13KvCacheConfig23getCrossKvCacheFractionEv">tensorrt_llm::executor::KvCacheConfig::getCrossKvCacheFraction (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor13KvCacheConfig19getEnableBlockReuseEv">tensorrt_llm::executor::KvCacheConfig::getEnableBlockReuse (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getEnablePartialReuseEv">tensorrt_llm::executor::KvCacheConfig::getEnablePartialReuse (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getEventBufferMaxSizeEv">tensorrt_llm::executor::KvCacheConfig::getEventBufferMaxSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor13KvCacheConfig24getFreeGpuMemoryFractionEv">tensorrt_llm::executor::KvCacheConfig::getFreeGpuMemoryFraction (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor13KvCacheConfig16getHostCacheSizeEv">tensorrt_llm::executor::KvCacheConfig::getHostCacheSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor13KvCacheConfig24getMaxAttentionWindowVecEv">tensorrt_llm::executor::KvCacheConfig::getMaxAttentionWindowVec (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor13KvCacheConfig19getMaxGpuTotalBytesEv">tensorrt_llm::executor::KvCacheConfig::getMaxGpuTotalBytes (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor13KvCacheConfig12getMaxTokensEv">tensorrt_llm::executor::KvCacheConfig::getMaxTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor13KvCacheConfig16getOnboardBlocksEv">tensorrt_llm::executor::KvCacheConfig::getOnboardBlocks (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor13KvCacheConfig30getSecondaryOffloadMinPriorityEv">tensorrt_llm::executor::KvCacheConfig::getSecondaryOffloadMinPriority (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor13KvCacheConfig18getSinkTokenLengthEv">tensorrt_llm::executor::KvCacheConfig::getSinkTokenLength (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor13KvCacheConfig9getUseUvmEv">tensorrt_llm::executor::KvCacheConfig::getUseUvm (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig22kDefaultGpuMemFractionE">tensorrt_llm::executor::KvCacheConfig::kDefaultGpuMemFraction (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t">tensorrt_llm::executor::KvCacheConfig::KvCacheConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig32mAttentionDpEventsGatherPeriodMsE">tensorrt_llm::executor::KvCacheConfig::mAttentionDpEventsGatherPeriodMs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mCopyOnPartialReuseE">tensorrt_llm::executor::KvCacheConfig::mCopyOnPartialReuse (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig21mCrossKvCacheFractionE">tensorrt_llm::executor::KvCacheConfig::mCrossKvCacheFraction (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig17mEnableBlockReuseE">tensorrt_llm::executor::KvCacheConfig::mEnableBlockReuse (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mEnablePartialReuseE">tensorrt_llm::executor::KvCacheConfig::mEnablePartialReuse (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mEventBufferMaxSizeE">tensorrt_llm::executor::KvCacheConfig::mEventBufferMaxSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig22mFreeGpuMemoryFractionE">tensorrt_llm::executor::KvCacheConfig::mFreeGpuMemoryFraction (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig14mHostCacheSizeE">tensorrt_llm::executor::KvCacheConfig::mHostCacheSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig22mMaxAttentionWindowVecE">tensorrt_llm::executor::KvCacheConfig::mMaxAttentionWindowVec (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig17mMaxGpuTotalBytesE">tensorrt_llm::executor::KvCacheConfig::mMaxGpuTotalBytes (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig10mMaxTokensE">tensorrt_llm::executor::KvCacheConfig::mMaxTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig14mOnboardBlocksE">tensorrt_llm::executor::KvCacheConfig::mOnboardBlocks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig28mSecondaryOffloadMinPriorityE">tensorrt_llm::executor::KvCacheConfig::mSecondaryOffloadMinPriority (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig16mSinkTokenLengthE">tensorrt_llm::executor::KvCacheConfig::mSinkTokenLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig7mUseUvmE">tensorrt_llm::executor::KvCacheConfig::mUseUvm (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig34setAttentionDpEventsGatherPeriodMsE10SizeType32">tensorrt_llm::executor::KvCacheConfig::setAttentionDpEventsGatherPeriodMs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setCopyOnPartialReuseEb">tensorrt_llm::executor::KvCacheConfig::setCopyOnPartialReuse (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig23setCrossKvCacheFractionE9FloatType">tensorrt_llm::executor::KvCacheConfig::setCrossKvCacheFraction (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig19setEnableBlockReuseEb">tensorrt_llm::executor::KvCacheConfig::setEnableBlockReuse (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEnablePartialReuseEb">tensorrt_llm::executor::KvCacheConfig::setEnablePartialReuse (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEventBufferMaxSizeE6size_t">tensorrt_llm::executor::KvCacheConfig::setEventBufferMaxSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setFreeGpuMemoryFractionE9FloatType">tensorrt_llm::executor::KvCacheConfig::setFreeGpuMemoryFraction (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setHostCacheSizeE6size_t">tensorrt_llm::executor::KvCacheConfig::setHostCacheSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setMaxAttentionWindowVecENSt6vectorI10SizeType32EE">tensorrt_llm::executor::KvCacheConfig::setMaxAttentionWindowVec (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig19setMaxGpuTotalBytesE8uint64_t">tensorrt_llm::executor::KvCacheConfig::setMaxGpuTotalBytes (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig12setMaxTokensENSt8optionalI10SizeType32EE">tensorrt_llm::executor::KvCacheConfig::setMaxTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setOnboardBlocksEb">tensorrt_llm::executor::KvCacheConfig::setOnboardBlocks (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig30setSecondaryOffloadMinPriorityENSt8optionalI17RetentionPriorityEE">tensorrt_llm::executor::KvCacheConfig::setSecondaryOffloadMinPriority (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig18setSinkTokenLengthE10SizeType32">tensorrt_llm::executor::KvCacheConfig::setSinkTokenLength (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig9setUseUvmEb">tensorrt_llm::executor::KvCacheConfig::setUseUvm (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18KVCacheCreatedDataE">tensorrt_llm::executor::KVCacheCreatedData (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18KVCacheCreatedData22numBlocksPerCacheLevelE">tensorrt_llm::executor::KVCacheCreatedData::numBlocksPerCacheLevel (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12KVCacheEventE">tensorrt_llm::executor::KVCacheEvent (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12KVCacheEvent15attentionDpRankE">tensorrt_llm::executor::KVCacheEvent::attentionDpRank (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12KVCacheEvent4dataE">tensorrt_llm::executor::KVCacheEvent::data (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12KVCacheEvent7eventIdE">tensorrt_llm::executor::KVCacheEvent::eventId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12KVCacheEvent12KVCacheEventE6IdType16KVCacheEventData10SizeType32NSt8optionalI10SizeType32EE">tensorrt_llm::executor::KVCacheEvent::KVCacheEvent (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12KVCacheEvent10windowSizeE">tensorrt_llm::executor::KVCacheEvent::windowSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor16KVCacheEventDataE">tensorrt_llm::executor::KVCacheEventData (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4I0EN12tensorrt_llm8executor16KVCacheEventDiffE">tensorrt_llm::executor::KVCacheEventDiff (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor16KVCacheEventDiff8newValueE">tensorrt_llm::executor::KVCacheEventDiff::newValue (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor16KVCacheEventDiff8oldValueE">tensorrt_llm::executor::KVCacheEventDiff::oldValue (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19KVCacheEventManagerE">tensorrt_llm::executor::KVCacheEventManager (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19KVCacheEventManager15getLatestEventsENSt8optionalINSt6chrono12millisecondsEEE">tensorrt_llm::executor::KVCacheEventManager::getLatestEvents (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19KVCacheEventManager19KVCacheEventManagerENSt10shared_ptrIN12tensorrt_llm13batch_manager16kv_cache_manager18BaseKVCacheManagerEEE">tensorrt_llm::executor::KVCacheEventManager::KVCacheEventManager (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19KVCacheEventManager14kvCacheManagerE">tensorrt_llm::executor::KVCacheEventManager::kvCacheManager (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18KVCacheRemovedDataE">tensorrt_llm::executor::KVCacheRemovedData (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18KVCacheRemovedData11blockHashesE">tensorrt_llm::executor::KVCacheRemovedData::blockHashes (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfigE">tensorrt_llm::executor::KvCacheRetentionConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig19getDecodeDurationMsEv">tensorrt_llm::executor::KvCacheRetentionConfig::getDecodeDurationMs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig26getDecodeRetentionPriorityEv">tensorrt_llm::executor::KvCacheRetentionConfig::getDecodeRetentionPriority (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig12getDirectoryEv">tensorrt_llm::executor::KvCacheRetentionConfig::getDirectory (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig36getPerBlockRetentionPriorityDurationE10SizeType3210SizeType32">tensorrt_llm::executor::KvCacheRetentionConfig::getPerBlockRetentionPriorityDuration (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig29getTokenRangeRetentionConfigsEv">tensorrt_llm::executor::KvCacheRetentionConfig::getTokenRangeRetentionConfigs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig15getTransferModeEv">tensorrt_llm::executor::KvCacheRetentionConfig::getTransferMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25kDefaultRetentionPriorityE">tensorrt_llm::executor::KvCacheRetentionConfig::kDefaultRetentionPriority (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig21kMaxRetentionPriorityE">tensorrt_llm::executor::KvCacheRetentionConfig::kMaxRetentionPriority (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig21kMinRetentionPriorityE">tensorrt_llm::executor::KvCacheRetentionConfig::kMinRetentionPriority (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeRKNSt6stringE">tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigEv">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig17mDecodeDurationMsE">tensorrt_llm::executor::KvCacheRetentionConfig::mDecodeDurationMs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig24mDecodeRetentionPriorityE">tensorrt_llm::executor::KvCacheRetentionConfig::mDecodeRetentionPriority (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig10mDirectoryE">tensorrt_llm::executor::KvCacheRetentionConfig::mDirectory (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig27mTokenRangeRetentionConfigsE">tensorrt_llm::executor::KvCacheRetentionConfig::mTokenRangeRetentionConfigs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig13mTransferModeE">tensorrt_llm::executor::KvCacheRetentionConfig::mTransferMode (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfigeqERK22KvCacheRetentionConfig">tensorrt_llm::executor::KvCacheRetentionConfig::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfigE">tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig10durationMsE">tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::durationMs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfigeqERK25TokenRangeRetentionConfig">tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig8priorityE">tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::priority (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig8tokenEndE">tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::tokenEnd (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE">tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::TokenRangeRetentionConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig10tokenStartE">tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::tokenStart (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12KvCacheStatsE">tensorrt_llm::executor::KvCacheStats (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12KvCacheStats14allocNewBlocksE">tensorrt_llm::executor::KvCacheStats::allocNewBlocks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12KvCacheStats16allocTotalBlocksE">tensorrt_llm::executor::KvCacheStats::allocTotalBlocks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12KvCacheStats12cacheHitRateE">tensorrt_llm::executor::KvCacheStats::cacheHitRate (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12KvCacheStats13freeNumBlocksE">tensorrt_llm::executor::KvCacheStats::freeNumBlocks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12KvCacheStats12maxNumBlocksE">tensorrt_llm::executor::KvCacheStats::maxNumBlocks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12KvCacheStats12missedBlocksE">tensorrt_llm::executor::KvCacheStats::missedBlocks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12KvCacheStats12reusedBlocksE">tensorrt_llm::executor::KvCacheStats::reusedBlocks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12KvCacheStats14tokensPerBlockE">tensorrt_llm::executor::KvCacheStats::tokensPerBlock (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12KvCacheStats13usedNumBlocksE">tensorrt_llm::executor::KvCacheStats::usedNumBlocks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockDataE">tensorrt_llm::executor::KVCacheStoredBlockData (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData9blockHashE">tensorrt_llm::executor::KVCacheStoredBlockData::blockHash (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData10cacheLevelE">tensorrt_llm::executor::KVCacheStoredBlockData::cacheLevel (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32">tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6loraIdE">tensorrt_llm::executor::KVCacheStoredBlockData::loraId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData8priorityE">tensorrt_llm::executor::KVCacheStoredBlockData::priority (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6tokensE">tensorrt_llm::executor::KVCacheStoredBlockData::tokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17KVCacheStoredDataE">tensorrt_llm::executor::KVCacheStoredData (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17KVCacheStoredData6blocksE">tensorrt_llm::executor::KVCacheStoredData::blocks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17KVCacheStoredData10parentHashE">tensorrt_llm::executor::KVCacheStoredData::parentHash (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19KvCacheTransferModeE">tensorrt_llm::executor::KvCacheTransferMode (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode4DRAME">tensorrt_llm::executor::KvCacheTransferMode::DRAM (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode3GDSE">tensorrt_llm::executor::KvCacheTransferMode::GDS (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode20POSIX_DEBUG_FALLBACKE">tensorrt_llm::executor::KvCacheTransferMode::POSIX_DEBUG_FALLBACK (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedDataE">tensorrt_llm::executor::KVCacheUpdatedData (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData9blockHashE">tensorrt_llm::executor::KVCacheUpdatedData::blockHash (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData10cacheLevelE">tensorrt_llm::executor::KVCacheUpdatedData::cacheLevel (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData17cacheLevelUpdatedE10SizeType3210SizeType32">tensorrt_llm::executor::KVCacheUpdatedData::cacheLevelUpdated (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdType">tensorrt_llm::executor::KVCacheUpdatedData::KVCacheUpdatedData (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdTypeNSt8optionalI16KVCacheEventDiffI10SizeType32EEENSt8optionalI16KVCacheEventDiffI10SizeType32EEE">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData8priorityE">tensorrt_llm::executor::KVCacheUpdatedData::priority (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData15priorityUpdatedE10SizeType3210SizeType32">tensorrt_llm::executor::KVCacheUpdatedData::priorityUpdated (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19LogitsPostProcessorE">tensorrt_llm::executor::LogitsPostProcessor (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor26LogitsPostProcessorBatchedE">tensorrt_llm::executor::LogitsPostProcessorBatched (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfigE">tensorrt_llm::executor::LogitsPostProcessorConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig19getProcessorBatchedEv">tensorrt_llm::executor::LogitsPostProcessorConfig::getProcessorBatched (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig15getProcessorMapEv">tensorrt_llm::executor::LogitsPostProcessorConfig::getProcessorMap (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig12getReplicateEv">tensorrt_llm::executor::LogitsPostProcessorConfig::getReplicate (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig25LogitsPostProcessorConfigENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI26LogitsPostProcessorBatchedEEb">tensorrt_llm::executor::LogitsPostProcessorConfig::LogitsPostProcessorConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig17mProcessorBatchedE">tensorrt_llm::executor::LogitsPostProcessorConfig::mProcessorBatched (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig13mProcessorMapE">tensorrt_llm::executor::LogitsPostProcessorConfig::mProcessorMap (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig10mReplicateE">tensorrt_llm::executor::LogitsPostProcessorConfig::mReplicate (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig19setProcessorBatchedERK26LogitsPostProcessorBatched">tensorrt_llm::executor::LogitsPostProcessorConfig::setProcessorBatched (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig15setProcessorMapERK22LogitsPostProcessorMap">tensorrt_llm::executor::LogitsPostProcessorConfig::setProcessorMap (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig12setReplicateEb">tensorrt_llm::executor::LogitsPostProcessorConfig::setReplicate (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22LogitsPostProcessorMapE">tensorrt_llm::executor::LogitsPostProcessorMap (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfigE">tensorrt_llm::executor::LookaheadDecodingConfig (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig28calculateSpeculativeResourceEv">tensorrt_llm::executor::LookaheadDecodingConfig::calculateSpeculativeResource (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig33calculateSpeculativeResourceTupleE10SizeType3210SizeType3210SizeType32">tensorrt_llm::executor::LookaheadDecodingConfig::calculateSpeculativeResourceTuple (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig3getEv">tensorrt_llm::executor::LookaheadDecodingConfig::get (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig12getNgramSizeEv">tensorrt_llm::executor::LookaheadDecodingConfig::getNgramSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig22getVerificationSetSizeEv">tensorrt_llm::executor::LookaheadDecodingConfig::getVerificationSetSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig13getWindowSizeEv">tensorrt_llm::executor::LookaheadDecodingConfig::getWindowSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig4isLEERK23LookaheadDecodingConfig">tensorrt_llm::executor::LookaheadDecodingConfig::isLE (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig7isLegalE10SizeType3210SizeType3210SizeType32">tensorrt_llm::executor::LookaheadDecodingConfig::isLegal (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig30kDefaultLookaheadDecodingNgramE">tensorrt_llm::executor::LookaheadDecodingConfig::kDefaultLookaheadDecodingNgram (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig40kDefaultLookaheadDecodingVerificationSetE">tensorrt_llm::executor::LookaheadDecodingConfig::kDefaultLookaheadDecodingVerificationSet (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig31kDefaultLookaheadDecodingWindowE">tensorrt_llm::executor::LookaheadDecodingConfig::kDefaultLookaheadDecodingWindow (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigE10SizeType3210SizeType3210SizeType32">tensorrt_llm::executor::LookaheadDecodingConfig::LookaheadDecodingConfig (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigEv">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig10mNgramSizeE">tensorrt_llm::executor::LookaheadDecodingConfig::mNgramSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig20mVerificationSetSizeE">tensorrt_llm::executor::LookaheadDecodingConfig::mVerificationSetSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig11mWindowSizeE">tensorrt_llm::executor::LookaheadDecodingConfig::mWindowSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfigeqERK23LookaheadDecodingConfig">tensorrt_llm::executor::LookaheadDecodingConfig::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10LoraConfigE">tensorrt_llm::executor::LoraConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor10LoraConfig9getConfigEv">tensorrt_llm::executor::LoraConfig::getConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor10LoraConfig9getTaskIdEv">tensorrt_llm::executor::LoraConfig::getTaskId (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor10LoraConfig10getWeightsEv">tensorrt_llm::executor::LoraConfig::getWeights (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE">tensorrt_llm::executor::LoraConfig::LoraConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10LoraConfig7mConfigE">tensorrt_llm::executor::LoraConfig::mConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10LoraConfig7mTaskIdE">tensorrt_llm::executor::LoraConfig::mTaskId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10LoraConfig8mWeightsE">tensorrt_llm::executor::LoraConfig::mWeights (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13MedusaChoicesE">tensorrt_llm::executor::MedusaChoices (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10MemoryTypeE">tensorrt_llm::executor::MemoryType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10MemoryType4kCPUE">tensorrt_llm::executor::MemoryType::kCPU (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10MemoryType11kCPU_PINNEDE">tensorrt_llm::executor::MemoryType::kCPU_PINNED (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10MemoryType15kCPU_PINNEDPOOLE">tensorrt_llm::executor::MemoryType::kCPU_PINNEDPOOL (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10MemoryType4kGPUE">tensorrt_llm::executor::MemoryType::kGPU (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10MemoryType8kUNKNOWNE">tensorrt_llm::executor::MemoryType::kUNKNOWN (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10MemoryType4kUVME">tensorrt_llm::executor::MemoryType::kUVM (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor16MillisecondsTypeE">tensorrt_llm::executor::MillisecondsType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor9ModelTypeE">tensorrt_llm::executor::ModelType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor9ModelType13kDECODER_ONLYE">tensorrt_llm::executor::ModelType::kDECODER_ONLY (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor9ModelType16kENCODER_DECODERE">tensorrt_llm::executor::ModelType::kENCODER_DECODER (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor9ModelType13kENCODER_ONLYE">tensorrt_llm::executor::ModelType::kENCODER_ONLY (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11MropeConfigE">tensorrt_llm::executor::MropeConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor11MropeConfig22getMRopePositionDeltasEv">tensorrt_llm::executor::MropeConfig::getMRopePositionDeltas (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor11MropeConfig20getMRopeRotaryCosSinEv">tensorrt_llm::executor::MropeConfig::getMRopeRotaryCosSin (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11MropeConfig20mMRopePositionDeltasE">tensorrt_llm::executor::MropeConfig::mMRopePositionDeltas (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11MropeConfig18mMRopeRotaryCosSinE">tensorrt_llm::executor::MropeConfig::mMRopeRotaryCosSin (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11MropeConfig11MropeConfigE6Tensor10SizeType32">tensorrt_llm::executor::MropeConfig::MropeConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15MultimodalInputE">tensorrt_llm::executor::MultimodalInput (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15MultimodalInput19getMultimodalHashesEv">tensorrt_llm::executor::MultimodalInput::getMultimodalHashes (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15MultimodalInput20getMultimodalLengthsEv">tensorrt_llm::executor::MultimodalInput::getMultimodalLengths (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15MultimodalInput22getMultimodalPositionsEv">tensorrt_llm::executor::MultimodalInput::getMultimodalPositions (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15MultimodalInput17mMultimodalHashesE">tensorrt_llm::executor::MultimodalInput::mMultimodalHashes (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15MultimodalInput18mMultimodalLengthsE">tensorrt_llm::executor::MultimodalInput::mMultimodalLengths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15MultimodalInput20mMultimodalPositionsE">tensorrt_llm::executor::MultimodalInput::mMultimodalPositions (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15MultimodalInput15MultimodalInputENSt6vectorINSt6vectorI10SizeType32EEEENSt6vectorI10SizeType32EENSt6vectorI10SizeType32EE">tensorrt_llm::executor::MultimodalInput::MultimodalInput (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE21ContextChunkingPolicy">tensorrt_llm::executor::operator&lt;&lt; (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE23CapacitySchedulerPolicy">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18OrchestratorConfigE">tensorrt_llm::executor::OrchestratorConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getIsOrchestratorEv">tensorrt_llm::executor::OrchestratorConfig::getIsOrchestrator (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getOrchLeaderCommEv">tensorrt_llm::executor::OrchestratorConfig::getOrchLeaderComm (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getSpawnProcessesEv">tensorrt_llm::executor::OrchestratorConfig::getSpawnProcesses (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig23getWorkerExecutablePathEv">tensorrt_llm::executor::OrchestratorConfig::getWorkerExecutablePath (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mIsOrchestratorE">tensorrt_llm::executor::OrchestratorConfig::mIsOrchestrator (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mOrchLeaderCommE">tensorrt_llm::executor::OrchestratorConfig::mOrchLeaderComm (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mSpawnProcessesE">tensorrt_llm::executor::OrchestratorConfig::mSpawnProcesses (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18OrchestratorConfig21mWorkerExecutablePathE">tensorrt_llm::executor::OrchestratorConfig::mWorkerExecutablePath (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb">tensorrt_llm::executor::OrchestratorConfig::OrchestratorConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setIsOrchestratorEb">tensorrt_llm::executor::OrchestratorConfig::setIsOrchestrator (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setOrchLeaderCommERKNSt10shared_ptrIN3mpi7MpiCommEEE">tensorrt_llm::executor::OrchestratorConfig::setOrchLeaderComm (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setSpawnProcessesEb">tensorrt_llm::executor::OrchestratorConfig::setSpawnProcesses (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18OrchestratorConfig23setWorkerExecutablePathERKNSt6stringE">tensorrt_llm::executor::OrchestratorConfig::setWorkerExecutablePath (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12OutputConfigE">tensorrt_llm::executor::OutputConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12OutputConfig22additionalModelOutputsE">tensorrt_llm::executor::OutputConfig::additionalModelOutputs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12OutputConfig22excludeInputFromOutputE">tensorrt_llm::executor::OutputConfig::excludeInputFromOutput (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE">tensorrt_llm::executor::OutputConfig::OutputConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12OutputConfig19returnContextLogitsE">tensorrt_llm::executor::OutputConfig::returnContextLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12OutputConfig19returnEncoderOutputE">tensorrt_llm::executor::OutputConfig::returnEncoderOutput (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12OutputConfig22returnGenerationLogitsE">tensorrt_llm::executor::OutputConfig::returnGenerationLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12OutputConfig14returnLogProbsE">tensorrt_llm::executor::OutputConfig::returnLogProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12OutputConfig17returnPerfMetricsE">tensorrt_llm::executor::OutputConfig::returnPerfMetrics (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfigE">tensorrt_llm::executor::ParallelConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ParallelConfig20getCommunicationModeEv">tensorrt_llm::executor::ParallelConfig::getCommunicationMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ParallelConfig20getCommunicationTypeEv">tensorrt_llm::executor::ParallelConfig::getCommunicationType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ParallelConfig12getDeviceIdsEv">tensorrt_llm::executor::ParallelConfig::getDeviceIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ParallelConfig11getNumNodesEv">tensorrt_llm::executor::ParallelConfig::getNumNodes (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ParallelConfig21getOrchestratorConfigEv">tensorrt_llm::executor::ParallelConfig::getOrchestratorConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ParallelConfig17getParticipantIdsEv">tensorrt_llm::executor::ParallelConfig::getParticipantIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfig9mCommModeE">tensorrt_llm::executor::ParallelConfig::mCommMode (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfig9mCommTypeE">tensorrt_llm::executor::ParallelConfig::mCommType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfig10mDeviceIdsE">tensorrt_llm::executor::ParallelConfig::mDeviceIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfig9mNumNodesE">tensorrt_llm::executor::ParallelConfig::mNumNodes (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfig19mOrchestratorConfigE">tensorrt_llm::executor::ParallelConfig::mOrchestratorConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfig15mParticipantIdsE">tensorrt_llm::executor::ParallelConfig::mParticipantIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE">tensorrt_llm::executor::ParallelConfig::ParallelConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationModeE17CommunicationMode">tensorrt_llm::executor::ParallelConfig::setCommunicationMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationTypeE17CommunicationType">tensorrt_llm::executor::ParallelConfig::setCommunicationType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfig12setDeviceIdsERKNSt6vectorI10SizeType32EE">tensorrt_llm::executor::ParallelConfig::setDeviceIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfig11setNumNodesE10SizeType32">tensorrt_llm::executor::ParallelConfig::setNumNodes (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfig21setOrchestratorConfigERK18OrchestratorConfig">tensorrt_llm::executor::ParallelConfig::setOrchestratorConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfig17setParticipantIdsERKNSt6vectorI10SizeType32EE">tensorrt_llm::executor::ParallelConfig::setParticipantIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfigE">tensorrt_llm::executor::PeftCacheConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getDeviceCachePercentEv">tensorrt_llm::executor::PeftCacheConfig::getDeviceCachePercent (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig16getHostCacheSizeEv">tensorrt_llm::executor::PeftCacheConfig::getHostCacheSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig18getLoraPrefetchDirEv">tensorrt_llm::executor::PeftCacheConfig::getLoraPrefetchDir (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig17getMaxAdapterSizeEv">tensorrt_llm::executor::PeftCacheConfig::getMaxAdapterSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig25getMaxPagesPerBlockDeviceEv">tensorrt_llm::executor::PeftCacheConfig::getMaxPagesPerBlockDevice (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig23getMaxPagesPerBlockHostEv">tensorrt_llm::executor::PeftCacheConfig::getMaxPagesPerBlockHost (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig17getNumCopyStreamsEv">tensorrt_llm::executor::PeftCacheConfig::getNumCopyStreams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig23getNumDeviceModuleLayerEv">tensorrt_llm::executor::PeftCacheConfig::getNumDeviceModuleLayer (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig19getNumEnsureWorkersEv">tensorrt_llm::executor::PeftCacheConfig::getNumEnsureWorkers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getNumHostModuleLayerEv">tensorrt_llm::executor::PeftCacheConfig::getNumHostModuleLayer (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig16getNumPutWorkersEv">tensorrt_llm::executor::PeftCacheConfig::getNumPutWorkers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getOptimalAdapterSizeEv">tensorrt_llm::executor::PeftCacheConfig::getOptimalAdapterSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig22kDefaultMaxAdapterSizeE">tensorrt_llm::executor::PeftCacheConfig::kDefaultMaxAdapterSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig30kDefaultMaxPagesPerBlockDeviceE">tensorrt_llm::executor::PeftCacheConfig::kDefaultMaxPagesPerBlockDevice (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig28kDefaultMaxPagesPerBlockHostE">tensorrt_llm::executor::PeftCacheConfig::kDefaultMaxPagesPerBlockHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig26kDefaultOptimalAdapterSizeE">tensorrt_llm::executor::PeftCacheConfig::kDefaultOptimalAdapterSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mDeviceCachePercentE">tensorrt_llm::executor::PeftCacheConfig::mDeviceCachePercent (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig14mHostCacheSizeE">tensorrt_llm::executor::PeftCacheConfig::mHostCacheSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig16mLoraPrefetchDirE">tensorrt_llm::executor::PeftCacheConfig::mLoraPrefetchDir (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15mMaxAdapterSizeE">tensorrt_llm::executor::PeftCacheConfig::mMaxAdapterSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig23mMaxPagesPerBlockDeviceE">tensorrt_llm::executor::PeftCacheConfig::mMaxPagesPerBlockDevice (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig21mMaxPagesPerBlockHostE">tensorrt_llm::executor::PeftCacheConfig::mMaxPagesPerBlockHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15mNumCopyStreamsE">tensorrt_llm::executor::PeftCacheConfig::mNumCopyStreams (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig21mNumDeviceModuleLayerE">tensorrt_llm::executor::PeftCacheConfig::mNumDeviceModuleLayer (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig17mNumEnsureWorkersE">tensorrt_llm::executor::PeftCacheConfig::mNumEnsureWorkers (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mNumHostModuleLayerE">tensorrt_llm::executor::PeftCacheConfig::mNumHostModuleLayer (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig14mNumPutWorkersE">tensorrt_llm::executor::PeftCacheConfig::mNumPutWorkers (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mOptimalAdapterSizeE">tensorrt_llm::executor::PeftCacheConfig::mOptimalAdapterSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfigeqERK15PeftCacheConfig">tensorrt_llm::executor::PeftCacheConfig::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE">tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12PriorityTypeE">tensorrt_llm::executor::PriorityType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18PromptTuningConfigE">tensorrt_llm::executor::PromptTuningConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor18PromptTuningConfig17getEmbeddingTableEv">tensorrt_llm::executor::PromptTuningConfig::getEmbeddingTable (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor18PromptTuningConfig21getInputTokenExtraIdsEv">tensorrt_llm::executor::PromptTuningConfig::getInputTokenExtraIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18PromptTuningConfig15mEmbeddingTableE">tensorrt_llm::executor::PromptTuningConfig::mEmbeddingTable (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18PromptTuningConfig19mInputTokenExtraIdsE">tensorrt_llm::executor::PromptTuningConfig::mInputTokenExtraIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18PromptTuningConfig18PromptTuningConfigE6TensorNSt8optionalI16VecTokenExtraIdsEE">tensorrt_llm::executor::PromptTuningConfig::PromptTuningConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14RandomSeedTypeE">tensorrt_llm::executor::RandomSeedType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7RequestE">tensorrt_llm::executor::Request (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request24getAdditionalOutputNamesEv">tensorrt_llm::executor::Request::getAdditionalOutputNames (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request17getAllottedTimeMsEv">tensorrt_llm::executor::Request::getAllottedTimeMs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request11getBadWordsEv">tensorrt_llm::executor::Request::getBadWords (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request14getCacheSaltIDEv">tensorrt_llm::executor::Request::getCacheSaltID (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request11getClientIdEv">tensorrt_llm::executor::Request::getClientId (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request21getContextPhaseParamsEv">tensorrt_llm::executor::Request::getContextPhaseParams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request21getCrossAttentionMaskEv">tensorrt_llm::executor::Request::getCrossAttentionMask (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request14getEagleConfigEv">tensorrt_llm::executor::Request::getEagleConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request16getEmbeddingBiasEv">tensorrt_llm::executor::Request::getEmbeddingBias (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request23getEncoderInputFeaturesEv">tensorrt_llm::executor::Request::getEncoderInputFeatures (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request23getEncoderInputTokenIdsEv">tensorrt_llm::executor::Request::getEncoderInputTokenIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request22getEncoderOutputLengthEv">tensorrt_llm::executor::Request::getEncoderOutputLength (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request8getEndIdEv">tensorrt_llm::executor::Request::getEndId (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request28getExternalDraftTokensConfigEv">tensorrt_llm::executor::Request::getExternalDraftTokensConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request23getGuidedDecodingParamsEv">tensorrt_llm::executor::Request::getGuidedDecodingParams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request16getInputTokenIdsEv">tensorrt_llm::executor::Request::getInputTokenIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request25getKvCacheRetentionConfigEv">tensorrt_llm::executor::Request::getKvCacheRetentionConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request21getLanguageAdapterUidEv">tensorrt_llm::executor::Request::getLanguageAdapterUid (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request22getLogitsPostProcessorEv">tensorrt_llm::executor::Request::getLogitsPostProcessor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request26getLogitsPostProcessorNameEv">tensorrt_llm::executor::Request::getLogitsPostProcessorName (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request18getLookaheadConfigEv">tensorrt_llm::executor::Request::getLookaheadConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request13getLoraConfigEv">tensorrt_llm::executor::Request::getLoraConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request12getMaxTokensEv">tensorrt_llm::executor::Request::getMaxTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request14getMropeConfigEv">tensorrt_llm::executor::Request::getMropeConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request22getMultimodalEmbeddingEv">tensorrt_llm::executor::Request::getMultimodalEmbedding (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request18getMultimodalInputEv">tensorrt_llm::executor::Request::getMultimodalInput (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request15getOutputConfigEv">tensorrt_llm::executor::Request::getOutputConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request8getPadIdEv">tensorrt_llm::executor::Request::getPadId (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request14getPositionIdsEv">tensorrt_llm::executor::Request::getPositionIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request11getPriorityEv">tensorrt_llm::executor::Request::getPriority (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request21getPromptTuningConfigEv">tensorrt_llm::executor::Request::getPromptTuningConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request14getRequestTypeEv">tensorrt_llm::executor::Request::getRequestType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request27getReturnAllGeneratedTokensEv">tensorrt_llm::executor::Request::getReturnAllGeneratedTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request17getSamplingConfigEv">tensorrt_llm::executor::Request::getSamplingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request22getSkipCrossAttnBlocksEv">tensorrt_llm::executor::Request::getSkipCrossAttnBlocks (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request12getStopWordsEv">tensorrt_llm::executor::Request::getStopWords (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request12getStreamingEv">tensorrt_llm::executor::Request::getStreaming (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request25kBatchedPostProcessorNameE">tensorrt_llm::executor::Request::kBatchedPostProcessorName (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request16kDefaultPriorityE">tensorrt_llm::executor::Request::kDefaultPriority (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request31kDynamicPostProcessorNamePrefixE">tensorrt_llm::executor::Request::kDynamicPostProcessorNamePrefix (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request5mImplE">tensorrt_llm::executor::Request::mImpl (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7RequestaSERK7Request">tensorrt_llm::executor::Request::operator= (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7RequestaSERR7Request">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE">tensorrt_llm::executor::Request::Request (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request7RequestERK7Request">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request7RequestERR7Request">[2]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request17setAllottedTimeMsE16MillisecondsType">tensorrt_llm::executor::Request::setAllottedTimeMs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request11setBadWordsERKNSt4listI9VecTokensEE">tensorrt_llm::executor::Request::setBadWords (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request14setCacheSaltIDE15CacheSaltIDType">tensorrt_llm::executor::Request::setCacheSaltID (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request11setClientIdE6IdType">tensorrt_llm::executor::Request::setClientId (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request21setContextPhaseParamsE18ContextPhaseParams">tensorrt_llm::executor::Request::setContextPhaseParams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request21setCrossAttentionMaskE6Tensor">tensorrt_llm::executor::Request::setCrossAttentionMask (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request14setEagleConfigERKNSt8optionalI11EagleConfigEE">tensorrt_llm::executor::Request::setEagleConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request16setEmbeddingBiasERK6Tensor">tensorrt_llm::executor::Request::setEmbeddingBias (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputFeaturesE6Tensor">tensorrt_llm::executor::Request::setEncoderInputFeatures (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputTokenIdsERK9VecTokens">tensorrt_llm::executor::Request::setEncoderInputTokenIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request22setEncoderOutputLengthE10SizeType32">tensorrt_llm::executor::Request::setEncoderOutputLength (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request8setEndIdE10SizeType32">tensorrt_llm::executor::Request::setEndId (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request28setExternalDraftTokensConfigERK25ExternalDraftTokensConfig">tensorrt_llm::executor::Request::setExternalDraftTokensConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request23setGuidedDecodingParamsERK20GuidedDecodingParams">tensorrt_llm::executor::Request::setGuidedDecodingParams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request25setKvCacheRetentionConfigERK22KvCacheRetentionConfig">tensorrt_llm::executor::Request::setKvCacheRetentionConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request21setLanguageAdapterUidE10SizeType32">tensorrt_llm::executor::Request::setLanguageAdapterUid (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request22setLogitsPostProcessorERKNSt8optionalI19LogitsPostProcessorEE">tensorrt_llm::executor::Request::setLogitsPostProcessor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request26setLogitsPostProcessorNameERKNSt6stringE">tensorrt_llm::executor::Request::setLogitsPostProcessorName (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request18setLookaheadConfigERK23LookaheadDecodingConfig">tensorrt_llm::executor::Request::setLookaheadConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request13setLoraConfigERK10LoraConfig">tensorrt_llm::executor::Request::setLoraConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request14setMropeConfigERK11MropeConfig">tensorrt_llm::executor::Request::setMropeConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request22setMultimodalEmbeddingERK6Tensor">tensorrt_llm::executor::Request::setMultimodalEmbedding (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request18setMultimodalInputERK15MultimodalInput">tensorrt_llm::executor::Request::setMultimodalInput (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request15setOutputConfigERK12OutputConfig">tensorrt_llm::executor::Request::setOutputConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request8setPadIdE10SizeType32">tensorrt_llm::executor::Request::setPadId (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request14setPositionIdsERKNSt6vectorI10SizeType32EE">tensorrt_llm::executor::Request::setPositionIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request11setPriorityE12PriorityType">tensorrt_llm::executor::Request::setPriority (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request21setPromptTuningConfigERK18PromptTuningConfig">tensorrt_llm::executor::Request::setPromptTuningConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request14setRequestTypeERK11RequestType">tensorrt_llm::executor::Request::setRequestType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request27setReturnAllGeneratedTokensEb">tensorrt_llm::executor::Request::setReturnAllGeneratedTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request17setSamplingConfigERK14SamplingConfig">tensorrt_llm::executor::Request::setSamplingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request22setSkipCrossAttnBlocksE6Tensor">tensorrt_llm::executor::Request::setSkipCrossAttnBlocks (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request12setStopWordsERKNSt4listI9VecTokensEE">tensorrt_llm::executor::Request::setStopWords (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request12setStreamingEb">tensorrt_llm::executor::Request::setStreaming (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7RequestD0Ev">tensorrt_llm::executor::Request::~Request (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetricsE">tensorrt_llm::executor::RequestPerfMetrics (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics9firstIterE">tensorrt_llm::executor::RequestPerfMetrics::firstIter (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics4iterE">tensorrt_llm::executor::RequestPerfMetrics::iter (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14kvCacheMetricsE">tensorrt_llm::executor::RequestPerfMetrics::kvCacheMetrics (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetricsE">tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics14kvCacheHitRateE">tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::kvCacheHitRate (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics15numMissedBlocksE">tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::numMissedBlocks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics21numNewAllocatedBlocksE">tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::numNewAllocatedBlocks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics15numReusedBlocksE">tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::numReusedBlocks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics23numTotalAllocatedBlocksE">tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::numTotalAllocatedBlocks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics8lastIterE">tensorrt_llm::executor::RequestPerfMetrics::lastIter (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics19speculativeDecodingE">tensorrt_llm::executor::RequestPerfMetrics::speculativeDecoding (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetricsE">tensorrt_llm::executor::RequestPerfMetrics::SpeculativeDecodingMetrics (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics14acceptanceRateE">tensorrt_llm::executor::RequestPerfMetrics::SpeculativeDecodingMetrics::acceptanceRate (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics24totalAcceptedDraftTokensE">tensorrt_llm::executor::RequestPerfMetrics::SpeculativeDecodingMetrics::totalAcceptedDraftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics16totalDraftTokensE">tensorrt_llm::executor::RequestPerfMetrics::SpeculativeDecodingMetrics::totalDraftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics9TimePointE">tensorrt_llm::executor::RequestPerfMetrics::TimePoint (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13timingMetricsE">tensorrt_llm::executor::RequestPerfMetrics::timingMetrics (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetricsE">tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics11arrivalTimeE">tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::arrivalTime (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics18firstScheduledTimeE">tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::firstScheduledTime (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics14firstTokenTimeE">tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::firstTokenTime (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics11kvCacheSizeE">tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::kvCacheSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics18kvCacheTransferEndE">tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::kvCacheTransferEnd (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics20kvCacheTransferStartE">tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::kvCacheTransferStart (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics13lastTokenTimeE">tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::lastTokenTime (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStageE">tensorrt_llm::executor::RequestStage (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStage20kCONTEXT_IN_PROGRESSE">tensorrt_llm::executor::RequestStage::kCONTEXT_IN_PROGRESS (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStage20kENCODER_IN_PROGRESSE">tensorrt_llm::executor::RequestStage::kENCODER_IN_PROGRESS (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStage20kGENERATION_COMPLETEE">tensorrt_llm::executor::RequestStage::kGENERATION_COMPLETE (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStage23kGENERATION_IN_PROGRESSE">tensorrt_llm::executor::RequestStage::kGENERATION_IN_PROGRESS (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStage7kQUEUEDE">tensorrt_llm::executor::RequestStage::kQUEUED (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStatsE">tensorrt_llm::executor::RequestStats (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStats24allocNewBlocksPerRequestE">tensorrt_llm::executor::RequestStats::allocNewBlocksPerRequest (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStats26allocTotalBlocksPerRequestE">tensorrt_llm::executor::RequestStats::allocTotalBlocksPerRequest (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStats26avgNumDecodedTokensPerIterE">tensorrt_llm::executor::RequestStats::avgNumDecodedTokensPerIter (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStats22contextPrefillPositionE">tensorrt_llm::executor::RequestStats::contextPrefillPosition (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStats15disServingStatsE">tensorrt_llm::executor::RequestStats::disServingStats (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStats2idE">tensorrt_llm::executor::RequestStats::id (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStats24kvCacheHitRatePerRequestE">tensorrt_llm::executor::RequestStats::kvCacheHitRatePerRequest (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStats22missedBlocksPerRequestE">tensorrt_llm::executor::RequestStats::missedBlocksPerRequest (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStats18numGeneratedTokensE">tensorrt_llm::executor::RequestStats::numGeneratedTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStats6pausedE">tensorrt_llm::executor::RequestStats::paused (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStats22reusedBlocksPerRequestE">tensorrt_llm::executor::RequestStats::reusedBlocksPerRequest (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStats9scheduledE">tensorrt_llm::executor::RequestStats::scheduled (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStats5stageE">tensorrt_llm::executor::RequestStats::stage (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor24RequestStatsPerIterationE">tensorrt_llm::executor::RequestStatsPerIteration (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor24RequestStatsPerIteration4iterE">tensorrt_llm::executor::RequestStatsPerIteration::iter (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor24RequestStatsPerIteration12requestStatsE">tensorrt_llm::executor::RequestStatsPerIteration::requestStats (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11RequestTypeE">tensorrt_llm::executor::RequestType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11RequestType35REQUEST_TYPE_CONTEXT_AND_GENERATIONE">tensorrt_llm::executor::RequestType::REQUEST_TYPE_CONTEXT_AND_GENERATION (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11RequestType25REQUEST_TYPE_CONTEXT_ONLYE">tensorrt_llm::executor::RequestType::REQUEST_TYPE_CONTEXT_ONLY (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11RequestType28REQUEST_TYPE_GENERATION_ONLYE">tensorrt_llm::executor::RequestType::REQUEST_TYPE_GENERATION_ONLY (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8ResponseE">tensorrt_llm::executor::Response (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8Response11getClientIdEv">tensorrt_llm::executor::Response::getClientId (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8Response11getErrorMsgEv">tensorrt_llm::executor::Response::getErrorMsg (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8Response12getRequestIdEv">tensorrt_llm::executor::Response::getRequestId (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8Response9getResultEv">tensorrt_llm::executor::Response::getResult (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8Response8hasErrorEv">tensorrt_llm::executor::Response::hasError (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Response5mImplE">tensorrt_llm::executor::Response::mImpl (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8ResponseaSERK8Response">tensorrt_llm::executor::Response::operator= (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8ResponseaSERR8Response">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6ResultNSt8optionalI6IdTypeEE">tensorrt_llm::executor::Response::Response (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringENSt8optionalI6IdTypeEE">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Response8ResponseERK8Response">[2]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Response8ResponseERR8Response">[3]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8ResponseD0Ev">tensorrt_llm::executor::Response::~Response (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6ResultE">tensorrt_llm::executor::Result (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result17additionalOutputsE">tensorrt_llm::executor::Result::additionalOutputs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result23avgDecodedTokensPerIterE">tensorrt_llm::executor::Result::avgDecodedTokensPerIter (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result13contextLogitsE">tensorrt_llm::executor::Result::contextLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result18contextPhaseParamsE">tensorrt_llm::executor::Result::contextPhaseParams (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result11cumLogProbsE">tensorrt_llm::executor::Result::cumLogProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result12decodingIterE">tensorrt_llm::executor::Result::decodingIter (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result13encoderOutputE">tensorrt_llm::executor::Result::encoderOutput (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result13finishReasonsE">tensorrt_llm::executor::Result::finishReasons (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result16generationLogitsE">tensorrt_llm::executor::Result::generationLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result7isFinalE">tensorrt_llm::executor::Result::isFinal (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result15isSequenceFinalE">tensorrt_llm::executor::Result::isSequenceFinal (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result8logProbsE">tensorrt_llm::executor::Result::logProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result14outputTokenIdsE">tensorrt_llm::executor::Result::outputTokenIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result18requestPerfMetricsE">tensorrt_llm::executor::Result::requestPerfMetrics (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result13sequenceIndexE">tensorrt_llm::executor::Result::sequenceIndex (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result21specDecFastLogitsInfoE">tensorrt_llm::executor::Result::specDecFastLogitsInfo (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17RetentionPriorityE">tensorrt_llm::executor::RetentionPriority (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDurationE">tensorrt_llm::executor::RetentionPriorityAndDuration (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration10durationMsE">tensorrt_llm::executor::RetentionPriorityAndDuration::durationMs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration17retentionPriorityE">tensorrt_llm::executor::RetentionPriorityAndDuration::retentionPriority (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration28RetentionPriorityAndDurationERKNSt8optionalI17RetentionPriorityEERKNSt8optionalINSt6chrono12millisecondsEEE">tensorrt_llm::executor::RetentionPriorityAndDuration::RetentionPriorityAndDuration (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfigE">tensorrt_llm::executor::SamplingConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig28checkBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SamplingConfig::checkBeamSearchDiversityRate (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkBeamWidthE10SizeType32">tensorrt_llm::executor::SamplingConfig::checkBeamWidth (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig19checkBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEEK10SizeType32">tensorrt_llm::executor::SamplingConfig::checkBeamWidthArray (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkEarlyStoppingERKNSt8optionalI10SizeType32EE">tensorrt_llm::executor::SamplingConfig::checkEarlyStopping (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkLengthPenaltyERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SamplingConfig::checkLengthPenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkMinPERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SamplingConfig::checkMinP (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkMinTokensERKNSt8optionalI10SizeType32EE">tensorrt_llm::executor::SamplingConfig::checkMinTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE">tensorrt_llm::executor::SamplingConfig::checkNoRepeatNgramSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkNumReturnSequencesERKNSt8optionalI10SizeType32EE10SizeType32">tensorrt_llm::executor::SamplingConfig::checkNumReturnSequences (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkPromptIgnoreLengthERKNSt8optionalI10SizeType32EE">tensorrt_llm::executor::SamplingConfig::checkPromptIgnoreLength (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkRepetitionPenaltyERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SamplingConfig::checkRepetitionPenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig16checkTemperatureERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SamplingConfig::checkTemperature (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopKERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SamplingConfig::checkTopK (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopPERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SamplingConfig::checkTopP (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkTopPDecayERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SamplingConfig::checkTopPDecay (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig12checkTopPMinERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SamplingConfig::checkTopPMin (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig17checkTopPResetIdsERKNSt8optionalI11TokenIdTypeEE">tensorrt_llm::executor::SamplingConfig::checkTopPResetIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig26getBeamSearchDiversityRateEv">tensorrt_llm::executor::SamplingConfig::getBeamSearchDiversityRate (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getBeamWidthEv">tensorrt_llm::executor::SamplingConfig::getBeamWidth (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig17getBeamWidthArrayEv">tensorrt_llm::executor::SamplingConfig::getBeamWidthArray (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig16getEarlyStoppingEv">tensorrt_llm::executor::SamplingConfig::getEarlyStopping (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig19getFrequencyPenaltyEv">tensorrt_llm::executor::SamplingConfig::getFrequencyPenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig16getLengthPenaltyEv">tensorrt_llm::executor::SamplingConfig::getLengthPenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getMinPEv">tensorrt_llm::executor::SamplingConfig::getMinP (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getMinTokensEv">tensorrt_llm::executor::SamplingConfig::getMinTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig20getNoRepeatNgramSizeEv">tensorrt_llm::executor::SamplingConfig::getNoRepeatNgramSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig17getNumReturnBeamsEv">tensorrt_llm::executor::SamplingConfig::getNumReturnBeams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig21getNumReturnSequencesEv">tensorrt_llm::executor::SamplingConfig::getNumReturnSequences (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig18getPresencePenaltyEv">tensorrt_llm::executor::SamplingConfig::getPresencePenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig21getPromptIgnoreLengthEv">tensorrt_llm::executor::SamplingConfig::getPromptIgnoreLength (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig20getRepetitionPenaltyEv">tensorrt_llm::executor::SamplingConfig::getRepetitionPenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getSeedEv">tensorrt_llm::executor::SamplingConfig::getSeed (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig14getTemperatureEv">tensorrt_llm::executor::SamplingConfig::getTemperature (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getTopKEv">tensorrt_llm::executor::SamplingConfig::getTopK (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getTopPEv">tensorrt_llm::executor::SamplingConfig::getTopP (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getTopPDecayEv">tensorrt_llm::executor::SamplingConfig::getTopPDecay (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig10getTopPMinEv">tensorrt_llm::executor::SamplingConfig::getTopPMin (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig15getTopPResetIdsEv">tensorrt_llm::executor::SamplingConfig::getTopPResetIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig24mBeamSearchDiversityRateE">tensorrt_llm::executor::SamplingConfig::mBeamSearchDiversityRate (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig10mBeamWidthE">tensorrt_llm::executor::SamplingConfig::mBeamWidth (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig15mBeamWidthArrayE">tensorrt_llm::executor::SamplingConfig::mBeamWidthArray (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig14mEarlyStoppingE">tensorrt_llm::executor::SamplingConfig::mEarlyStopping (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig17mFrequencyPenaltyE">tensorrt_llm::executor::SamplingConfig::mFrequencyPenalty (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig14mLengthPenaltyE">tensorrt_llm::executor::SamplingConfig::mLengthPenalty (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig5mMinPE">tensorrt_llm::executor::SamplingConfig::mMinP (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig10mMinTokensE">tensorrt_llm::executor::SamplingConfig::mMinTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig18mNoRepeatNgramSizeE">tensorrt_llm::executor::SamplingConfig::mNoRepeatNgramSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig15mNumReturnBeamsE">tensorrt_llm::executor::SamplingConfig::mNumReturnBeams (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig19mNumReturnSequencesE">tensorrt_llm::executor::SamplingConfig::mNumReturnSequences (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig16mPresencePenaltyE">tensorrt_llm::executor::SamplingConfig::mPresencePenalty (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig19mPromptIgnoreLengthE">tensorrt_llm::executor::SamplingConfig::mPromptIgnoreLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig18mRepetitionPenaltyE">tensorrt_llm::executor::SamplingConfig::mRepetitionPenalty (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig5mSeedE">tensorrt_llm::executor::SamplingConfig::mSeed (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig12mTemperatureE">tensorrt_llm::executor::SamplingConfig::mTemperature (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig5mTopKE">tensorrt_llm::executor::SamplingConfig::mTopK (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig5mTopPE">tensorrt_llm::executor::SamplingConfig::mTopP (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig10mTopPDecayE">tensorrt_llm::executor::SamplingConfig::mTopPDecay (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig8mTopPMinE">tensorrt_llm::executor::SamplingConfig::mTopPMin (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig13mTopPResetIdsE">tensorrt_llm::executor::SamplingConfig::mTopPResetIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfigeqERK14SamplingConfig">tensorrt_llm::executor::SamplingConfig::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE">tensorrt_llm::executor::SamplingConfig::SamplingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig26setBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SamplingConfig::setBeamSearchDiversityRate (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig12setBeamWidthE10SizeType32">tensorrt_llm::executor::SamplingConfig::setBeamWidth (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig17setBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEE">tensorrt_llm::executor::SamplingConfig::setBeamWidthArray (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig16setEarlyStoppingERKNSt8optionalI10SizeType32EE">tensorrt_llm::executor::SamplingConfig::setEarlyStopping (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig19setFrequencyPenaltyERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SamplingConfig::setFrequencyPenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig16setLengthPenaltyERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SamplingConfig::setLengthPenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig7setMinPERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SamplingConfig::setMinP (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig12setMinTokensERKNSt8optionalI10SizeType32EE">tensorrt_llm::executor::SamplingConfig::setMinTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig20setNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE">tensorrt_llm::executor::SamplingConfig::setNoRepeatNgramSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig21setNumReturnSequencesERKNSt8optionalI10SizeType32EE">tensorrt_llm::executor::SamplingConfig::setNumReturnSequences (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig18setPresencePenaltyERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SamplingConfig::setPresencePenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig21setPromptIgnoreLengthERKNSt8optionalI10SizeType32EE">tensorrt_llm::executor::SamplingConfig::setPromptIgnoreLength (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig20setRepetitionPenaltyERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SamplingConfig::setRepetitionPenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig7setSeedERKNSt8optionalI14RandomSeedTypeEE">tensorrt_llm::executor::SamplingConfig::setSeed (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig14setTemperatureERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SamplingConfig::setTemperature (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopKERKNSt8optionalI10SizeType32EE">tensorrt_llm::executor::SamplingConfig::setTopK (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopPERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SamplingConfig::setTopP (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig12setTopPDecayERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SamplingConfig::setTopPDecay (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig10setTopPMinERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SamplingConfig::setTopPMin (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig15setTopPResetIdsERKNSt8optionalI11TokenIdTypeEE">tensorrt_llm::executor::SamplingConfig::setTopPResetIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig20updateNumReturnBeamsEv">tensorrt_llm::executor::SamplingConfig::updateNumReturnBeams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15SchedulerConfigE">tensorrt_llm::executor::SchedulerConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15SchedulerConfig26getCapacitySchedulerPolicyEv">tensorrt_llm::executor::SchedulerConfig::getCapacitySchedulerPolicy (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15SchedulerConfig24getContextChunkingPolicyEv">tensorrt_llm::executor::SchedulerConfig::getContextChunkingPolicy (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15SchedulerConfig21getDynamicBatchConfigEv">tensorrt_llm::executor::SchedulerConfig::getDynamicBatchConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15SchedulerConfig24mCapacitySchedulerPolicyE">tensorrt_llm::executor::SchedulerConfig::mCapacitySchedulerPolicy (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15SchedulerConfig22mContextChunkingPolicyE">tensorrt_llm::executor::SchedulerConfig::mContextChunkingPolicy (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15SchedulerConfig19mDynamicBatchConfigE">tensorrt_llm::executor::SchedulerConfig::mDynamicBatchConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15SchedulerConfigeqERK15SchedulerConfig">tensorrt_llm::executor::SchedulerConfig::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE23CapacitySchedulerPolicyNSt8optionalI21ContextChunkingPolicyEENSt8optionalI18DynamicBatchConfigEE">tensorrt_llm::executor::SchedulerConfig::SchedulerConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13SerializationE">tensorrt_llm::executor::Serialization (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization32deserializeAdditionalModelOutputERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeAdditionalModelOutput (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization27deserializeAdditionalOutputERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeAdditionalOutput (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization21deserializeAgentStateERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeAgentState (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization19deserializeBlockKeyERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeBlockKey (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization15deserializeBoolERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeBool (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization21deserializeCacheStateERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeCacheState (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization33deserializeCacheTransceiverConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeCacheTransceiverConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization20deserializeCommStateERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeCommState (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization29deserializeContextPhaseParamsERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeContextPhaseParams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt6vectorIcEE">tensorrt_llm::executor::Serialization::deserializeDataTransceiverState (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt7istreamE">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization22deserializeDebugConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeDebugConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization25deserializeDecodingConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeDecodingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization23deserializeDecodingModeERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeDecodingMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization33deserializeDisServingRequestStatsERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeDisServingRequestStats (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization29deserializeDynamicBatchConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeDynamicBatchConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization22deserializeEagleConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeEagleConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization25deserializeExecutorConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeExecutorConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization40deserializeExtendedRuntimePerfKnobConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeExtendedRuntimePerfKnobConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization36deserializeExternalDraftTokensConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeExternalDraftTokensConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeGuidedDecodingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingParamsERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeGuidedDecodingParams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization32deserializeInflightBatchingStatsERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeInflightBatchingStats (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt6vectorIcEE">tensorrt_llm::executor::Serialization::deserializeIterationStats (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt7istreamE">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization28deserializeIterationStatsVecERNSt6vectorIcEE">tensorrt_llm::executor::Serialization::deserializeIterationStatsVec (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization24deserializeKvCacheConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeKvCacheConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheCreatedDataERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeKVCacheCreatedData (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization23deserializeKVCacheEventERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeKVCacheEvent (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4I0EN12tensorrt_llm8executor13Serialization27deserializeKVCacheEventDiffE16KVCacheEventDiffI1TERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeKVCacheEventDiff (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization24deserializeKVCacheEventsERNSt6vectorIcEE">tensorrt_llm::executor::Serialization::deserializeKVCacheEvents (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheRemovedDataERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeKVCacheRemovedData (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization33deserializeKvCacheRetentionConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeKvCacheRetentionConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization23deserializeKvCacheStatsERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeKvCacheStats (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization33deserializeKVCacheStoredBlockDataERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeKVCacheStoredBlockData (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization28deserializeKVCacheStoredDataERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeKVCacheStoredData (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheUpdatedDataERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeKVCacheUpdatedData (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization34deserializeLookaheadDecodingConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeLookaheadDecodingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization21deserializeLoraConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeLoraConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization20deserializeModelTypeERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeModelType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization22deserializeMropeConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeMropeConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization26deserializeMultimodalInputERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeMultimodalInput (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization29deserializeOrchestratorConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeOrchestratorConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization23deserializeOutputConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeOutputConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization25deserializeParallelConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeParallelConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization26deserializePeftCacheConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializePeftCacheConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization29deserializePromptTuningConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializePromptTuningConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization18deserializeRequestERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeRequest (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization29deserializeRequestPerfMetricsERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeRequestPerfMetrics (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStageERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeRequestStage (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStatsERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeRequestStats (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt6vectorIcEE">tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIteration (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt7istreamE">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization38deserializeRequestStatsPerIterationVecERNSt6vectorIcEE">tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIterationVec (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization19deserializeResponseERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeResponse (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization20deserializeResponsesERNSt6vectorIcEE">tensorrt_llm::executor::Serialization::deserializeResponses (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization17deserializeResultERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeResult (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization25deserializeSamplingConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeSamplingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization26deserializeSchedulerConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeSchedulerConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization22deserializeSocketStateERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeSocketState (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization32deserializeSpecDecFastLogitsInfoERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeSpecDecFastLogitsInfo (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization28deserializeSpecDecodingStatsERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeSpecDecodingStats (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization36deserializeSpeculativeDecodingConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeSpeculativeDecodingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization30deserializeStaticBatchingStatsERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeStaticBatchingStats (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization17deserializeStringERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeString (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization17deserializeTensorERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeTensor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization20deserializeTimePointERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeTimePoint (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization36deserializeTokenRangeRetentionConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeTokenRangeRetentionConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization22deserializeUniqueTokenERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeUniqueToken (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4I0EN12tensorrt_llm8executor13Serialization9serializeEvRK16KVCacheEventDiffI1TERNSt7ostreamE">tensorrt_llm::executor::Serialization::serialize (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK10LoraConfigRNSt7ostreamE">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11DebugConfigRNSt7ostreamE">[2]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11EagleConfigRNSt7ostreamE">[3]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11MropeConfigRNSt7ostreamE">[4]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12DecodingModeRNSt7ostreamE">[5]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KVCacheEventRNSt7ostreamE">[6]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KvCacheStatsRNSt7ostreamE">[7]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12OutputConfigRNSt7ostreamE">[8]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStageRNSt7ostreamE">[9]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStatsRNSt7ostreamE">[10]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK13KvCacheConfigRNSt7ostreamE">[11]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14DecodingConfigRNSt7ostreamE">[12]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ExecutorConfigRNSt7ostreamE">[13]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStats">[14]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStatsRNSt7ostreamE">[15]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ParallelConfigRNSt7ostreamE">[16]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14SamplingConfigRNSt7ostreamE">[17]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15MultimodalInputRNSt7ostreamE">[18]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15PeftCacheConfigRNSt7ostreamE">[19]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15SchedulerConfigRNSt7ostreamE">[20]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK16AdditionalOutputRNSt7ostreamE">[21]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17KVCacheStoredDataRNSt7ostreamE">[22]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17SpecDecodingStatsRNSt7ostreamE">[23]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18ContextPhaseParamsRNSt7ostreamE">[24]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18DynamicBatchConfigRNSt7ostreamE">[25]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheCreatedDataRNSt7ostreamE">[26]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheRemovedDataRNSt7ostreamE">[27]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheUpdatedDataRNSt7ostreamE">[28]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18OrchestratorConfigRNSt7ostreamE">[29]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18PromptTuningConfigRNSt7ostreamE">[30]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18RequestPerfMetricsRNSt7ostreamE">[31]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK19StaticBatchingStatsRNSt7ostreamE">[32]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverState">[33]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverStateRNSt7ostreamE">[34]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingConfigRNSt7ostreamE">[35]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingParamsRNSt7ostreamE">[36]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21AdditionalModelOutputRNSt7ostreamE">[37]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21InflightBatchingStatsRNSt7ostreamE">[38]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22CacheTransceiverConfigRNSt7ostreamE">[39]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22DisServingRequestStatsRNSt7ostreamE">[40]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KVCacheStoredBlockDataRNSt7ostreamE">[41]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KvCacheRetentionConfigRNSt7ostreamE">[42]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK23LookaheadDecodingConfigRNSt7ostreamE">[43]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIteration">[44]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIterationRNSt7ostreamE">[45]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25ExternalDraftTokensConfigRNSt7ostreamE">[46]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25SpeculativeDecodingConfigRNSt7ostreamE">[47]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK29ExtendedRuntimePerfKnobConfigRNSt7ostreamE">[48]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK33SpeculativeDecodingFastLogitsInfoRNSt7ostreamE">[49]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6ResultRNSt7ostreamE">[50]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6TensorRNSt7ostreamE">[51]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK7RequestRNSt7ostreamE">[52]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK8ResponseRNSt7ostreamE">[53]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN12tensorrt_llm13batch_manager16kv_cache_manager8BlockKeyERNSt7ostreamE">[54]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN12tensorrt_llm7runtime11UniqueTokenERNSt7ostreamE">[55]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN18RequestPerfMetrics9TimePointERNSt7ostreamE">[56]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigERNSt7ostreamE">[57]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10AgentStateERNSt7ostreamE">[58]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10CacheStateERNSt7ostreamE">[59]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache11SocketStateERNSt7ostreamE">[60]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache9CommStateERNSt7ostreamE">[61]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt5dequeI12KVCacheEventEE">[62]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI14IterationStatsEE">[63]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI24RequestStatsPerIterationEE">[64]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI8ResponseEE">[65]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4I0EN12tensorrt_llm8executor13Serialization14serializedSizeE6size_tRK16KVCacheEventDiffI1TE">tensorrt_llm::executor::Serialization::serializedSize (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK10LoraConfig">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11DebugConfig">[2]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11EagleConfig">[3]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11MropeConfig">[4]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12DecodingMode">[5]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12KVCacheEvent">[6]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12KvCacheStats">[7]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12OutputConfig">[8]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStage">[9]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStats">[10]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK13KvCacheConfig">[11]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14DecodingConfig">[12]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ExecutorConfig">[13]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14IterationStats">[14]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ParallelConfig">[15]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14SamplingConfig">[16]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15MultimodalInput">[17]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15PeftCacheConfig">[18]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15SchedulerConfig">[19]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK16AdditionalOutput">[20]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK17KVCacheStoredData">[21]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK17SpecDecodingStats">[22]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18ContextPhaseParams">[23]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18DynamicBatchConfig">[24]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheCreatedData">[25]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheRemovedData">[26]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheUpdatedData">[27]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18OrchestratorConfig">[28]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18PromptTuningConfig">[29]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18RequestPerfMetrics">[30]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK19StaticBatchingStats">[31]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20DataTransceiverState">[32]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingConfig">[33]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingParams">[34]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21AdditionalModelOutput">[35]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21InflightBatchingStats">[36]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22CacheTransceiverConfig">[37]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22DisServingRequestStats">[38]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22KVCacheStoredBlockData">[39]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22KvCacheRetentionConfig">[40]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK23LookaheadDecodingConfig">[41]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK24RequestStatsPerIteration">[42]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25ExternalDraftTokensConfig">[43]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25SpeculativeDecodingConfig">[44]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK29ExtendedRuntimePerfKnobConfig">[45]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK33SpeculativeDecodingFastLogitsInfo">[46]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Result">[47]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Tensor">[48]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK7Request">[49]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK8Response">[50]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN12tensorrt_llm13batch_manager16kv_cache_manager8BlockKeyE">[51]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN12tensorrt_llm7runtime11UniqueTokenE">[52]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN18RequestPerfMetrics9TimePointE">[53]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigE">[54]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10AgentStateE">[55]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10CacheStateE">[56]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache11SocketStateE">[57]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache9CommStateE">[58]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor5ShapeE">tensorrt_llm::executor::Shape (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor5Shape4BaseE">tensorrt_llm::executor::Shape::Base (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor5Shape9DimType64E">tensorrt_llm::executor::Shape::DimType64 (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor5Shape5ShapeENSt16initializer_listI9DimType64EE">tensorrt_llm::executor::Shape::Shape (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor5Shape5ShapeEPK9DimType64N4Base9size_typeE">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor5Shape5ShapeEv">[2]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10SizeType32E">tensorrt_llm::executor::SizeType32 (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10SizeType64E">tensorrt_llm::executor::SizeType64 (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17SpecDecodingStatsE">tensorrt_llm::executor::SpecDecodingStats (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17SpecDecodingStats16acceptanceLengthE">tensorrt_llm::executor::SpecDecodingStats::acceptanceLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17SpecDecodingStats13draftOverheadE">tensorrt_llm::executor::SpecDecodingStats::draftOverhead (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17SpecDecodingStats13iterLatencyMSE">tensorrt_llm::executor::SpecDecodingStats::iterLatencyMS (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17SpecDecodingStats17numAcceptedTokensE">tensorrt_llm::executor::SpecDecodingStats::numAcceptedTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17SpecDecodingStats14numDraftTokensE">tensorrt_llm::executor::SpecDecodingStats::numDraftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17SpecDecodingStats26numRequestsWithDraftTokensE">tensorrt_llm::executor::SpecDecodingStats::numRequestsWithDraftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfigE">tensorrt_llm::executor::SpeculativeDecodingConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig10fastLogitsE">tensorrt_llm::executor::SpeculativeDecodingConfig::fastLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor25SpeculativeDecodingConfigeqERK25SpeculativeDecodingConfig">tensorrt_llm::executor::SpeculativeDecodingConfig::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig25SpeculativeDecodingConfigEb">tensorrt_llm::executor::SpeculativeDecodingConfig::SpeculativeDecodingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfoE">tensorrt_llm::executor::SpeculativeDecodingFastLogitsInfo (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo18draftParticipantIdE">tensorrt_llm::executor::SpeculativeDecodingFastLogitsInfo::draftParticipantId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo14draftRequestIdE">tensorrt_llm::executor::SpeculativeDecodingFastLogitsInfo::draftRequestId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo8toTensorEv">tensorrt_llm::executor::SpeculativeDecodingFastLogitsInfo::toTensor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19StaticBatchingStatsE">tensorrt_llm::executor::StaticBatchingStats (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19StaticBatchingStats13emptyGenSlotsE">tensorrt_llm::executor::StaticBatchingStats::emptyGenSlots (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19StaticBatchingStats18numContextRequestsE">tensorrt_llm::executor::StaticBatchingStats::numContextRequests (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19StaticBatchingStats12numCtxTokensE">tensorrt_llm::executor::StaticBatchingStats::numCtxTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19StaticBatchingStats12numGenTokensE">tensorrt_llm::executor::StaticBatchingStats::numGenTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19StaticBatchingStats20numScheduledRequestsE">tensorrt_llm::executor::StaticBatchingStats::numScheduledRequests (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor9StreamPtrE">tensorrt_llm::executor::StreamPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6TensorE">tensorrt_llm::executor::Tensor (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor6copyToENSt10shared_ptrI4ImplEE13CudaStreamPtr">tensorrt_llm::executor::Tensor::copyTo (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor9copyToCpuEN6Tensor13CudaStreamPtrE">tensorrt_llm::executor::Tensor::copyToCpu (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor9copyToGpuEN6Tensor13CudaStreamPtrE">tensorrt_llm::executor::Tensor::copyToGpu (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor13copyToManagedEN6Tensor13CudaStreamPtrE">tensorrt_llm::executor::Tensor::copyToManaged (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor12copyToPinnedEN6Tensor13CudaStreamPtrE">tensorrt_llm::executor::Tensor::copyToPinned (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor18copyToPooledPinnedEN6Tensor13CudaStreamPtrE">tensorrt_llm::executor::Tensor::copyToPooledPinned (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4I0EN12tensorrt_llm8executor6Tensor3cpuE6Tensor5Shape">tensorrt_llm::executor::Tensor::cpu (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor3cpuE8DataType5Shape">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor13CudaStreamPtrE">tensorrt_llm::executor::Tensor::CudaStreamPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE">tensorrt_llm::executor::Tensor::detail::ofITensor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor6detail9toITensorERK6Tensor">tensorrt_llm::executor::Tensor::detail::toITensor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor7getDataEv">tensorrt_llm::executor::Tensor::getData (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor7getDataEv">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor11getDataTypeEv">tensorrt_llm::executor::Tensor::getDataType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor13getMemoryTypeEv">tensorrt_llm::executor::Tensor::getMemoryType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4I0EN12tensorrt_llm8executor6Tensor14getRuntimeTypeE8DataTypev">tensorrt_llm::executor::Tensor::getRuntimeType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor8getShapeEv">tensorrt_llm::executor::Tensor::getShape (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor7getSizeEv">tensorrt_llm::executor::Tensor::getSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor14getSizeInBytesEv">tensorrt_llm::executor::Tensor::getSizeInBytes (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape">tensorrt_llm::executor::Tensor::gpu (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor4ImplE">tensorrt_llm::executor::Tensor::Impl (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4I0EN12tensorrt_llm8executor6Tensor7managedE6Tensor5Shape">tensorrt_llm::executor::Tensor::managed (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor7managedE8DataType5Shape">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor7mTensorE">tensorrt_llm::executor::Tensor::mTensor (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape">tensorrt_llm::executor::Tensor::of (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorR1T">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape">[2]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6TensorcvbEv">tensorrt_llm::executor::Tensor::operator bool (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6TensorneERK6Tensor">tensorrt_llm::executor::Tensor::operator!= (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6TensoraSERK6Tensor">tensorrt_llm::executor::Tensor::operator= (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6TensoraSERR6Tensor">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6TensoreqERK6Tensor">tensorrt_llm::executor::Tensor::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4I0EN12tensorrt_llm8executor6Tensor6pinnedE6Tensor5Shape">tensorrt_llm::executor::Tensor::pinned (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor6pinnedE8DataType5Shape">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4I0EN12tensorrt_llm8executor6Tensor12pooledPinnedE6Tensor5Shape">tensorrt_llm::executor::Tensor::pooledPinned (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor12pooledPinnedE8DataType5Shape">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor7setFromERK6Tensor13CudaStreamPtr">tensorrt_llm::executor::Tensor::setFrom (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor7setZeroE13CudaStreamPtr">tensorrt_llm::executor::Tensor::setZero (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor6TensorENSt10shared_ptrIN7runtime7ITensorEEE">tensorrt_llm::executor::Tensor::Tensor (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor6TensorERK6Tensor">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor6TensorERR6Tensor">[2]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor6TensorEv">[3]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6TensorD0Ev">tensorrt_llm::executor::Tensor::~Tensor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor9TensorPtrE">tensorrt_llm::executor::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11TokenIdTypeE">tensorrt_llm::executor::TokenIdType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4I0_bEN12tensorrt_llm8executor10TypeTraitsE">tensorrt_llm::executor::TypeTraits (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4IEN12tensorrt_llm8executor10TypeTraitsIbEE">tensorrt_llm::executor::TypeTraits&lt;bool&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10TypeTraitsIbE5valueE">tensorrt_llm::executor::TypeTraits&lt;bool&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4IEN12tensorrt_llm8executor10TypeTraitsIfEE">tensorrt_llm::executor::TypeTraits&lt;float&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10TypeTraitsIfE5valueE">tensorrt_llm::executor::TypeTraits&lt;float&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4IEN12tensorrt_llm8executor10TypeTraitsI4halfEE">tensorrt_llm::executor::TypeTraits&lt;half&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10TypeTraitsI4halfE5valueE">tensorrt_llm::executor::TypeTraits&lt;half&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7int32_tEEE">tensorrt_llm::executor::TypeTraits&lt;std::int32_t&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7int32_tEE5valueE">tensorrt_llm::executor::TypeTraits&lt;std::int32_t&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7int64_tEEE">tensorrt_llm::executor::TypeTraits&lt;std::int64_t&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7int64_tEE5valueE">tensorrt_llm::executor::TypeTraits&lt;std::int64_t&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt6int8_tEEE">tensorrt_llm::executor::TypeTraits&lt;std::int8_t&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt6int8_tEE5valueE">tensorrt_llm::executor::TypeTraits&lt;std::int8_t&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7uint8_tEEE">tensorrt_llm::executor::TypeTraits&lt;std::uint8_t&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7uint8_tEE5valueE">tensorrt_llm::executor::TypeTraits&lt;std::uint8_t&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4I0EN12tensorrt_llm8executor10TypeTraitsIP1TEE">tensorrt_llm::executor::TypeTraits&lt;T*&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10TypeTraitsIP1TE5valueE">tensorrt_llm::executor::TypeTraits&lt;T*&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11VecLogProbsE">tensorrt_llm::executor::VecLogProbs (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor16VecTokenExtraIdsE">tensorrt_llm::executor::VecTokenExtraIds (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor9VecTokensE">tensorrt_llm::executor::VecTokens (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7versionEv">tensorrt_llm::executor::version (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm6layersE">tensorrt_llm::layers (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm3mpiE">tensorrt_llm::mpi (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm7runtimeE">tensorrt_llm::runtime (C++ type)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm7runtimeE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[4]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[5]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[6]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[7]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[8]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[9]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[10]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[11]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[12]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[13]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[14]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[15]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[16]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[17]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[18]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[19]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[20]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[21]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[22]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[23]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[24]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[25]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[26]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[27]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[28]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[29]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[30]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[31]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[32]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[33]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[34]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[35]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[36]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16AllReduceBuffersE">tensorrt_llm::runtime::AllReduceBuffers (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb">tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers18mAllReduceCommPtrsE">tensorrt_llm::runtime::AllReduceBuffers::mAllReduceCommPtrs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers9mFlagPtrsE">tensorrt_llm::runtime::AllReduceBuffers::mFlagPtrs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers17mIpcMemoryHandlesE">tensorrt_llm::runtime::AllReduceBuffers::mIpcMemoryHandles (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers9TensorPtrE">tensorrt_llm::runtime::AllReduceBuffers::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer">tensorrt_llm::runtime::bufferCast (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7IBuffer9SharedPtrE">tensorrt_llm::runtime::bufferCastOrNull (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7ITensor9SharedPtrE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7IBuffer9SharedPtrEEE">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7ITensor9SharedPtrEEE">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7IBuffer14SharedConstPtrE">[4]</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7ITensor14SharedConstPtrE">[5]</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7IBuffer14SharedConstPtrEEE">[6]</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7ITensor14SharedConstPtrEEE">[7]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14BufferDataTypeE">tensorrt_llm::runtime::BufferDataType (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb">tensorrt_llm::runtime::BufferDataType::BufferDataType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14BufferDataType11getDataTypeEv">tensorrt_llm::runtime::BufferDataType::getDataType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14BufferDataType7getSizeEv">tensorrt_llm::runtime::BufferDataType::getSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14BufferDataType13getSizeInBitsEv">tensorrt_llm::runtime::BufferDataType::getSizeInBits (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14BufferDataType9isPointerEv">tensorrt_llm::runtime::BufferDataType::isPointer (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14BufferDataType10isUnsignedEv">tensorrt_llm::runtime::BufferDataType::isUnsigned (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14BufferDataType15kTrtPointerTypeE">tensorrt_llm::runtime::BufferDataType::kTrtPointerType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14BufferDataType9mDataTypeE">tensorrt_llm::runtime::BufferDataType::mDataType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14BufferDataType8mPointerE">tensorrt_llm::runtime::BufferDataType::mPointer (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14BufferDataType9mUnsignedE">tensorrt_llm::runtime::BufferDataType::mUnsigned (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14BufferDataTypecvN8nvinfer18DataTypeEEv">tensorrt_llm::runtime::BufferDataType::operator nvinfer1::DataType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManagerE">tensorrt_llm::runtime::BufferManager (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE">tensorrt_llm::runtime::BufferManager::allocate (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtrb">tensorrt_llm::runtime::BufferManager::BufferManager (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer">tensorrt_llm::runtime::BufferManager::copy (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer">[4]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType">tensorrt_llm::runtime::BufferManager::copyFrom (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType">[4]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE">tensorrt_llm::runtime::BufferManager::cpu (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager14CudaMemPoolPtrE">tensorrt_llm::runtime::BufferManager::CudaMemPoolPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager13CudaStreamPtrE">tensorrt_llm::runtime::BufferManager::CudaStreamPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE">tensorrt_llm::runtime::BufferManager::emptyBuffer (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE">tensorrt_llm::runtime::BufferManager::emptyTensor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager9getStreamEv">tensorrt_llm::runtime::BufferManager::getStream (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE">tensorrt_llm::runtime::BufferManager::gpu (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncEN8nvinfer14DimsEN8nvinfer18DataTypeE">tensorrt_llm::runtime::BufferManager::gpuSync (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncENSt6size_tEN8nvinfer18DataTypeE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager10IBufferPtrE">tensorrt_llm::runtime::BufferManager::IBufferPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager7ipcNvlsENSt3setIiEEN8nvinfer14DimsEN8nvinfer18DataTypeE">tensorrt_llm::runtime::BufferManager::ipcNvls (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager10ITensorPtrE">tensorrt_llm::runtime::BufferManager::ITensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager10kBYTE_TYPEE">tensorrt_llm::runtime::BufferManager::kBYTE_TYPE (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager7managedEN8nvinfer14DimsEN8nvinfer18DataTypeE">tensorrt_llm::runtime::BufferManager::managed (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager7managedENSt6size_tEN8nvinfer18DataTypeE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolFreeEv">tensorrt_llm::runtime::BufferManager::memoryPoolFree (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager18memoryPoolReservedEv">tensorrt_llm::runtime::BufferManager::memoryPoolReserved (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToENSt6size_tE">tensorrt_llm::runtime::BufferManager::memoryPoolTrimTo (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolUsedEv">tensorrt_llm::runtime::BufferManager::memoryPoolUsed (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager5mPoolE">tensorrt_llm::runtime::BufferManager::mPool (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager7mStreamE">tensorrt_llm::runtime::BufferManager::mStream (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager9mTrimPoolE">tensorrt_llm::runtime::BufferManager::mTrimPool (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE">tensorrt_llm::runtime::BufferManager::pinned (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolEN8nvinfer14DimsEN8nvinfer18DataTypeE">tensorrt_llm::runtime::BufferManager::pinnedPool (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolENSt6size_tEN8nvinfer18DataTypeE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager6setMemER7IBuffer7int32_t">tensorrt_llm::runtime::BufferManager::setMem (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager7setZeroER7IBuffer">tensorrt_llm::runtime::BufferManager::setZero (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManagerD0Ev">tensorrt_llm::runtime::BufferManager::~BufferManager (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime11BufferRangeE">tensorrt_llm::runtime::BufferRange (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11BufferRange4BaseE">tensorrt_llm::runtime::BufferRange::Base (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0_NSt11enable_if_tINSt10is_const_vI1UEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeERK7IBuffer">tensorrt_llm::runtime::BufferRange::BufferRange (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0_NSt11enable_if_tIXntNSt10is_const_vI1UEEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeEP1T9size_type">[2]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15CacheSaltIDTypeE">tensorrt_llm::runtime::CacheSaltIDType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13canAccessPeerERK11WorldConfig">tensorrt_llm::runtime::canAccessPeer (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime27clearVirtualMemoryAllocatorEv">tensorrt_llm::runtime::clearVirtualMemoryAllocator (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE">tensorrt_llm::runtime::constPointerCast (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9CudaEventE">tensorrt_llm::runtime::CudaEvent (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb">tensorrt_llm::runtime::CudaEvent::CudaEvent (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventEj">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9CudaEvent7DeleterE">tensorrt_llm::runtime::CudaEvent::Deleter (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEb">tensorrt_llm::runtime::CudaEvent::Deleter::Deleter (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter10mOwnsEventE">tensorrt_llm::runtime::CudaEvent::Deleter::mOwnsEvent (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9CudaEvent7DeleterclE7pointer">tensorrt_llm::runtime::CudaEvent::Deleter::operator() (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9CudaEvent12element_typeE">tensorrt_llm::runtime::CudaEvent::element_type (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9CudaEvent8EventPtrE">tensorrt_llm::runtime::CudaEvent::EventPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9CudaEvent3getEv">tensorrt_llm::runtime::CudaEvent::get (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9CudaEvent6mEventE">tensorrt_llm::runtime::CudaEvent::mEvent (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9CudaEvent7pointerE">tensorrt_llm::runtime::CudaEvent::pointer (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9CudaEvent11synchronizeEv">tensorrt_llm::runtime::CudaEvent::synchronize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10CudaStreamE">tensorrt_llm::runtime::CudaStream (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_t">tensorrt_llm::runtime::CudaStream::CudaStream (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji">[2]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10CudaStream7DeleterE">tensorrt_llm::runtime::CudaStream::Deleter (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEb">tensorrt_llm::runtime::CudaStream::Deleter::Deleter (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter11mOwnsStreamE">tensorrt_llm::runtime::CudaStream::Deleter::mOwnsStream (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10CudaStream7DeleterclE12cudaStream_t">tensorrt_llm::runtime::CudaStream::Deleter::operator() (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10CudaStream3getEv">tensorrt_llm::runtime::CudaStream::get (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10CudaStream9getDeviceEv">tensorrt_llm::runtime::CudaStream::getDevice (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10CudaStream7mDeviceE">tensorrt_llm::runtime::CudaStream::mDevice (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10CudaStream7mStreamE">tensorrt_llm::runtime::CudaStream::mStream (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordEN9CudaEvent7pointerE">tensorrt_llm::runtime::CudaStream::record (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordERK9CudaEvent">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10CudaStream9StreamPtrE">tensorrt_llm::runtime::CudaStream::StreamPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10CudaStream11synchronizeEv">tensorrt_llm::runtime::CudaStream::synchronize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitEN9CudaEvent7pointerE">tensorrt_llm::runtime::CudaStream::wait (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitERK9CudaEvent">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocatorE">tensorrt_llm::runtime::CudaVirtualMemoryAllocator (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator8allocateEP7PointerNSt6size_tEi">tensorrt_llm::runtime::CudaVirtualMemoryAllocator::allocate (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13ConfigurationE">tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration23backgroundConfigurationE">tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::backgroundConfiguration (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtr">tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtrb">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration11mBackgroundE">tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::mBackground (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration11mBackStreamE">tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::mBackStream (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration8mManagerE">tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::mManager (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration5mModeE">tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::mMode (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration9mPageSizeE">tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::mPageSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration4mTagE">tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::mTag (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration11pageAlignedENSt6size_tE">tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::pageAligned (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration25setVirtualMemoryAllocatorERKNSt6stringE11RestoreModeNSt10shared_ptrI10CudaStreamEE">tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::setVirtualMemoryAllocator (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13CudaStreamPtrE">tensorrt_llm::runtime::CudaVirtualMemoryAllocator::CudaStreamPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator26CudaVirtualMemoryAllocatorENSt10shared_ptrI13ConfigurationEE">tensorrt_llm::runtime::CudaVirtualMemoryAllocator::CudaVirtualMemoryAllocator (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator10deallocateE7PointerNSt6size_tE">tensorrt_llm::runtime::CudaVirtualMemoryAllocator::deallocate (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator7mConfigE">tensorrt_llm::runtime::CudaVirtualMemoryAllocator::mConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocatorcvbEv">tensorrt_llm::runtime::CudaVirtualMemoryAllocator::operator bool (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator7PointerE">tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Pointer (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreModeE">tensorrt_llm::runtime::CudaVirtualMemoryAllocator::RestoreMode (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode3CPUE">tensorrt_llm::runtime::CudaVirtualMemoryAllocator::RestoreMode::CPU (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode6MEMSETE">tensorrt_llm::runtime::CudaVirtualMemoryAllocator::RestoreMode::MEMSET (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode4NONEE">tensorrt_llm::runtime::CudaVirtualMemoryAllocator::RestoreMode::NONE (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode6PINNEDE">tensorrt_llm::runtime::CudaVirtualMemoryAllocator::RestoreMode::PINNED (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkE">tensorrt_llm::runtime::CUDAVirtualMemoryChunk (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk8_releaseEb">tensorrt_llm::runtime::CUDAVirtualMemoryChunk::_release (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratorE">tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator12ConfiguratorERK12Configurator">tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::Configurator (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator12ConfiguratorERR12Configurator">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator12ConfiguratorEv">[2]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratoraSERK12Configurator">tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::operator= (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratoraSERR12Configurator">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator5setupE28CUmemGenericAllocationHandle">tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::setup (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator8teardownE28CUmemGenericAllocationHandleb">tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::teardown (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratorD0Ev">tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::~Configurator (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk15ConfiguratorPtrE">tensorrt_llm::runtime::CUDAVirtualMemoryChunk::ConfiguratorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk13ConfiguratorsE">tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurators (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatorE">tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator6createEv">tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::create (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7CreatorERK7Creator">tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::Creator (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7CreatorERR7Creator">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7CreatorEv">[2]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatoraSERK7Creator">tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::operator= (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatoraSERR7Creator">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7releaseE28CUmemGenericAllocationHandleb">tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::release (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatorD0Ev">tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::~Creator (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk10CreatorPtrE">tensorrt_llm::runtime::CUDAVirtualMemoryChunk::CreatorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERK22CUDAVirtualMemoryChunk">tensorrt_llm::runtime::CUDAVirtualMemoryChunk::CUDAVirtualMemoryChunk (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERR10CreatorPtrRR13Configurators">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERR22CUDAVirtualMemoryChunk">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkEv">[3]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk13INVALID_STATEE">tensorrt_llm::runtime::CUDAVirtualMemoryChunk::INVALID_STATE (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk11materializeEv">tensorrt_llm::runtime::CUDAVirtualMemoryChunk::materialize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk14mConfiguratorsE">tensorrt_llm::runtime::CUDAVirtualMemoryChunk::mConfigurators (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk8mCreatorE">tensorrt_llm::runtime::CUDAVirtualMemoryChunk::mCreator (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7mHandleE">tensorrt_llm::runtime::CUDAVirtualMemoryChunk::mHandle (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6mStateE">tensorrt_llm::runtime::CUDAVirtualMemoryChunk::mState (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime22CUDAVirtualMemoryChunkcvbEv">tensorrt_llm::runtime::CUDAVirtualMemoryChunk::operator bool (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkaSERK22CUDAVirtualMemoryChunk">tensorrt_llm::runtime::CUDAVirtualMemoryChunk::operator= (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkaSERR22CUDAVirtualMemoryChunk">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7releaseEv">tensorrt_llm::runtime::CUDAVirtualMemoryChunk::release (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6StatusE">tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Status (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6statusEv">tensorrt_llm::runtime::CUDAVirtualMemoryChunk::status (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status7ERROREDE">tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Status::ERRORED (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status7INVALIDE">tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Status::INVALID (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status12MATERIALIZEDE">tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Status::MATERIALIZED (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status8RELEASEDE">tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Status::RELEASED (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkD0Ev">tensorrt_llm::runtime::CUDAVirtualMemoryChunk::~CUDAVirtualMemoryChunk (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManagerE">tensorrt_llm::runtime::CudaVirtualMemoryManager (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IDpEN12tensorrt_llm7runtime24CudaVirtualMemoryManager3addEv9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrEDpRR13Configurators">tensorrt_llm::runtime::CudaVirtualMemoryManager::add (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERR22CUDAVirtualMemoryChunk">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrERRN22CUDAVirtualMemoryChunk13ConfiguratorsE">[2]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager12addBadHandleE9uintptr_t">tensorrt_llm::runtime::CudaVirtualMemoryManager::addBadHandle (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager5EntryE">tensorrt_llm::runtime::CudaVirtualMemoryManager::Entry (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager5Entry8mEntryItE">tensorrt_llm::runtime::CudaVirtualMemoryManager::Entry::mEntryIt (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager5Entry7mMemoryE">tensorrt_llm::runtime::CudaVirtualMemoryManager::Entry::mMemory (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager18materializeWithTagERKNSt6stringE">tensorrt_llm::runtime::CudaVirtualMemoryManager::materializeWithTag (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager11mBadHandlesE">tensorrt_llm::runtime::CudaVirtualMemoryManager::mBadHandles (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager8mEntriesE">tensorrt_llm::runtime::CudaVirtualMemoryManager::mEntries (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager9mMemoriesE">tensorrt_llm::runtime::CudaVirtualMemoryManager::mMemories (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager6mMutexE">tensorrt_llm::runtime::CudaVirtualMemoryManager::mMutex (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager16PointerMemoryMapE">tensorrt_llm::runtime::CudaVirtualMemoryManager::PointerMemoryMap (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager14releaseWithTagERKNSt6stringE">tensorrt_llm::runtime::CudaVirtualMemoryManager::releaseWithTag (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager6removeE9uintptr_t">tensorrt_llm::runtime::CudaVirtualMemoryManager::remove (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager18retrieveBadHandlesEv">tensorrt_llm::runtime::CudaVirtualMemoryManager::retrieveBadHandles (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager11TagEntryMapE">tensorrt_llm::runtime::CudaVirtualMemoryManager::TagEntryMap (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager12unsafeRemoveE9uintptr_t">tensorrt_llm::runtime::CudaVirtualMemoryManager::unsafeRemove (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE">tensorrt_llm::runtime::DataTypeTraits (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE">tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4nameE">tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::name (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4sizeE">tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::size (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4typeE">tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::type (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedEE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4nameE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::name (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4sizeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::size (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4typeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::type (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEEE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4nameE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;::name (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4sizeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;::size (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4typeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;::type (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEEE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4nameE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;::name (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4sizeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;::size (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4typeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;::type (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEEE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4nameE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;::name (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4sizeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;::size (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4typeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;::type (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EEE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4nameE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::name (C++ member)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4sizeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::size (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4typeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::type (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEEE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4nameE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;::name (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4sizeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;::size (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4typeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;::type (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EEE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4nameE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;::name (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4sizeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;::size (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4typeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;::type (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EEE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4nameE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;::name (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4sizeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;::size (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4typeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;::type (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedEE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4nameE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::name (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4sizeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::size (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4typeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::type (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoderE">tensorrt_llm::runtime::decoder (C++ type)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoderE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffersE">tensorrt_llm::runtime::decoder::BeamSearchBuffers (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers17BeamSearchBuffersERK13BufferManager">tensorrt_llm::runtime::decoder::BeamSearchBuffers::BeamSearchBuffers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers15mCumLogProbsTmpE">tensorrt_llm::runtime::decoder::BeamSearchBuffers::mCumLogProbsTmp (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7mNumSMsE">tensorrt_llm::runtime::decoder::BeamSearchBuffers::mNumSMs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers21mOutputBeamHypothesesE">tensorrt_llm::runtime::decoder::BeamSearchBuffers::mOutputBeamHypotheses (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7reshapeE10SizeType3210SizeType32">tensorrt_llm::runtime::decoder::BeamSearchBuffers::reshape (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderStateE">tensorrt_llm::runtime::decoder::DecoderState (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12DecoderStateEv">tensorrt_llm::runtime::decoder::DecoderState::DecoderState (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16DecodingInputPtrE">tensorrt_llm::runtime::decoder::DecoderState::DecodingInputPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState17DecodingOutputPtrE">tensorrt_llm::runtime::decoder::DecoderState::DecodingOutputPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16disableLookaheadERK13RequestVector">tensorrt_llm::runtime::decoder::DecoderState::disableLookahead (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState24getAcceptedLengthsCumSumEv">tensorrt_llm::runtime::decoder::DecoderState::getAcceptedLengthsCumSum (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState22getAcceptedPackedPathsEv">tensorrt_llm::runtime::decoder::DecoderState::getAcceptedPackedPaths (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getAllNewTokensEv">tensorrt_llm::runtime::decoder::DecoderState::getAllNewTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState20getBeamSearchBuffersEv">tensorrt_llm::runtime::decoder::DecoderState::getBeamSearchBuffers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState24getCacheIndirectionInputEv">tensorrt_llm::runtime::decoder::DecoderState::getCacheIndirectionInput (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState25getCacheIndirectionOutputEv">tensorrt_llm::runtime::decoder::DecoderState::getCacheIndirectionOutput (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getCumLogProbsE10SizeType32">tensorrt_llm::runtime::decoder::DecoderState::getCumLogProbs (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getCumLogProbsEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getEagleBuffersEv">tensorrt_llm::runtime::decoder::DecoderState::getEagleBuffers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState29getExplicitDraftTokensBuffersEv">tensorrt_llm::runtime::decoder::DecoderState::getExplicitDraftTokensBuffers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getFinishedSumEv">tensorrt_llm::runtime::decoder::DecoderState::getFinishedSum (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState16getFinishReasonsEv">tensorrt_llm::runtime::decoder::DecoderState::getFinishReasons (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getGatheredIdsE10SizeType32">tensorrt_llm::runtime::decoder::DecoderState::getGatheredIds (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getGatheredIdsEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getGenerationStepsEv">tensorrt_llm::runtime::decoder::DecoderState::getGenerationSteps (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState6getIdsE10SizeType32">tensorrt_llm::runtime::decoder::DecoderState::getIds (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState6getIdsEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState21getJointDecodingInputEv">tensorrt_llm::runtime::decoder::DecoderState::getJointDecodingInput (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState22getJointDecodingOutputEv">tensorrt_llm::runtime::decoder::DecoderState::getJointDecodingOutput (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState11getLogProbsE10SizeType32">tensorrt_llm::runtime::decoder::DecoderState::getLogProbs (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState11getLogProbsEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState19getLookaheadBuffersEv">tensorrt_llm::runtime::decoder::DecoderState::getLookaheadBuffers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getMaxBeamWidthEv">tensorrt_llm::runtime::decoder::DecoderState::getMaxBeamWidth (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState27getMaxDecodingDecoderTokensEv">tensorrt_llm::runtime::decoder::DecoderState::getMaxDecodingDecoderTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getMaxDecodingEngineTokensEv">tensorrt_llm::runtime::decoder::DecoderState::getMaxDecodingEngineTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getMaxNumSequencesEv">tensorrt_llm::runtime::decoder::DecoderState::getMaxNumSequences (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState20getMaxSequenceLengthEv">tensorrt_llm::runtime::decoder::DecoderState::getMaxSequenceLength (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getNextDraftTokensEv">tensorrt_llm::runtime::decoder::DecoderState::getNextDraftTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState25getNextDraftTokensLengthsEv">tensorrt_llm::runtime::decoder::DecoderState::getNextDraftTokensLengths (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getNumDecodingEngineTokensE10SizeType32">tensorrt_llm::runtime::decoder::DecoderState::getNumDecodingEngineTokens (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getNumDecodingEngineTokensEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState12getParentIdsEv">tensorrt_llm::runtime::decoder::DecoderState::getParentIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState25getPrevDraftTokensLengthsEv">tensorrt_llm::runtime::decoder::DecoderState::getPrevDraftTokensLengths (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getSequenceLengthsE10SizeType32">tensorrt_llm::runtime::decoder::DecoderState::getSequenceLengths (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getSequenceLengthsEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getSpeculativeDecodingModeEv">tensorrt_llm::runtime::decoder::DecoderState::getSpeculativeDecodingMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13LlmRequestPtrE">tensorrt_llm::runtime::decoder::DecoderState::LlmRequestPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18mBeamSearchBuffersE">tensorrt_llm::runtime::decoder::DecoderState::mBeamSearchBuffers (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState19mJointDecodingInputE">tensorrt_llm::runtime::decoder::DecoderState::mJointDecodingInput (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState20mJointDecodingOutputE">tensorrt_llm::runtime::decoder::DecoderState::mJointDecodingOutput (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13mMaxBeamWidthE">tensorrt_llm::runtime::decoder::DecoderState::mMaxBeamWidth (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState25mMaxDecodingDecoderTokensE">tensorrt_llm::runtime::decoder::DecoderState::mMaxDecodingDecoderTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mMaxDecodingEngineTokensE">tensorrt_llm::runtime::decoder::DecoderState::mMaxDecodingEngineTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16mMaxNumSequencesE">tensorrt_llm::runtime::decoder::DecoderState::mMaxNumSequences (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18mMaxSequenceLengthE">tensorrt_llm::runtime::decoder::DecoderState::mMaxSequenceLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mNumDecodingEngineTokensE">tensorrt_llm::runtime::decoder::DecoderState::mNumDecodingEngineTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mSpeculativeDecodingModeE">tensorrt_llm::runtime::decoder::DecoderState::mSpeculativeDecodingMode (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13RequestVectorE">tensorrt_llm::runtime::decoder::DecoderState::RequestVector (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager">tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState30reshapeCacheIndirectionBuffersE10SizeType3210SizeType3210SizeType32">tensorrt_llm::runtime::decoder::DecoderState::reshapeCacheIndirectionBuffers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState33reshapeSpeculativeDecodingBuffersERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager">tensorrt_llm::runtime::decoder::DecoderState::reshapeSpeculativeDecodingBuffers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12setBeamWidthE10SizeType3210SizeType32">tensorrt_llm::runtime::decoder::DecoderState::setBeamWidth (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18setGenerationStepsERKNSt6vectorI10SizeType32EE">tensorrt_llm::runtime::decoder::DecoderState::setGenerationSteps (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState26setNumDecodingEngineTokensE10SizeType3210SizeType32">tensorrt_llm::runtime::decoder::DecoderState::setNumDecodingEngineTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager">tensorrt_llm::runtime::decoder::DecoderState::setup (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12setupBuffersEN8nvinfer18DataTypeERK13BufferManager">tensorrt_llm::runtime::decoder::DecoderState::setupBuffers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState21setupCacheIndirectionE10SizeType3210SizeType3210SizeType32RK13BufferManager">tensorrt_llm::runtime::decoder::DecoderState::setupCacheIndirection (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState28setupCacheIndirectionBuffersERK13BufferManager">tensorrt_llm::runtime::decoder::DecoderState::setupCacheIndirectionBuffers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager">tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState31setupSpeculativeDecodingBuffersE23SpeculativeDecodingModeN8nvinfer18DataTypeERK13BufferManager">tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecodingBuffers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState9TensorPtrE">tensorrt_llm::runtime::decoder::DecoderState::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batchE">tensorrt_llm::runtime::decoder_batch (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5InputE">tensorrt_llm::runtime::decoder_batch::Input (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input10batchSlotsE">tensorrt_llm::runtime::decoder_batch::Input::batchSlots (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI14TensorConstPtrEE">tensorrt_llm::runtime::decoder_batch::Input::Input (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorINSt6vectorI14TensorConstPtrEEEE10SizeType32">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input6logitsE">tensorrt_llm::runtime::decoder_batch::Input::logits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input15maxDecoderStepsE">tensorrt_llm::runtime::decoder_batch::Input::maxDecoderSteps (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input14TensorConstPtrE">tensorrt_llm::runtime::decoder_batch::Input::TensorConstPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input9TensorPtrE">tensorrt_llm::runtime::decoder_batch::Input::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInputE">tensorrt_llm::runtime::DecodingInput (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsLensE">tensorrt_llm::runtime::DecodingInput::badWordsLens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput13badWordsListsE">tensorrt_llm::runtime::DecodingInput::badWordsLists (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsPtrsE">tensorrt_llm::runtime::DecodingInput::badWordsPtrs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput9batchSizeE">tensorrt_llm::runtime::DecodingInput::batchSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput10batchSlotsE">tensorrt_llm::runtime::DecodingInput::batchSlots (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput10beamWidthsE">tensorrt_llm::runtime::DecodingInput::beamWidths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput16cacheIndirectionE">tensorrt_llm::runtime::DecodingInput::cacheIndirection (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputEv">tensorrt_llm::runtime::DecodingInput::DecodingInput (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput11eagleInputsE">tensorrt_llm::runtime::DecodingInput::eagleInputs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputsE">tensorrt_llm::runtime::DecodingInput::EagleInputs (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs12acceptedLensE">tensorrt_llm::runtime::DecodingInput::EagleInputs::acceptedLens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15acceptedPathIdsE">tensorrt_llm::runtime::DecodingInput::EagleInputs::acceptedPathIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14acceptedTokensE">tensorrt_llm::runtime::DecodingInput::EagleInputs::acceptedTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs24chunkedContextNextTokensE">tensorrt_llm::runtime::DecodingInput::EagleInputs::chunkedContextNextTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs13lastDraftLensE">tensorrt_llm::runtime::DecodingInput::EagleInputs::lastDraftLens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14lastDraftPathsE">tensorrt_llm::runtime::DecodingInput::EagleInputs::lastDraftPaths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15lastDraftTokensE">tensorrt_llm::runtime::DecodingInput::EagleInputs::lastDraftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs13nextDraftLensE">tensorrt_llm::runtime::DecodingInput::EagleInputs::nextDraftLens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14nextDraftPathsE">tensorrt_llm::runtime::DecodingInput::EagleInputs::nextDraftPaths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15nextDraftTokensE">tensorrt_llm::runtime::DecodingInput::EagleInputs::nextDraftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs8seqSlotsE">tensorrt_llm::runtime::DecodingInput::EagleInputs::seqSlots (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput13embeddingBiasE">tensorrt_llm::runtime::DecodingInput::embeddingBias (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput6endIdsE">tensorrt_llm::runtime::DecodingInput::endIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputsE">tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25explicitDraftTokensInputsE">tensorrt_llm::runtime::DecodingInput::explicitDraftTokensInputs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15bestPathIndicesE">tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::bestPathIndices (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15bestPathLengthsE">tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::bestPathLengths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs16lastDraftIndicesE">tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::lastDraftIndices (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15lastDraftTokensE">tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::lastDraftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs21lastGenerationLengthsE">tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::lastGenerationLengths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs19lastPositionIdsBaseE">tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::lastPositionIdsBase (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs5masksE">tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::masks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs18maxGenLengthDeviceE">tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::maxGenLengthDevice (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs16nextDraftIndicesE">tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextDraftIndices (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs14nextDraftProbsE">tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextDraftProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15nextDraftTokensE">tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextDraftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs14nextFlatTokensE">tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextFlatTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs21nextGenerationLengthsE">tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextGenerationLengths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs17packedPositionIdsE">tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::packedPositionIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs8seqSlotsE">tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::seqSlots (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputsE">tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25externalDraftTokensInputsE">tensorrt_llm::runtime::DecodingInput::externalDraftTokensInputs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs17constantThresholdE">tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::constantThreshold (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs11draftLogitsE">tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::draftLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs15draftLogitsHostE">tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::draftLogitsHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs10draftProbsE">tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::draftProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs13draftTokenIdsE">tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::draftTokenIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs17draftTokenIdsHostE">tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::draftTokenIdsHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs14numDraftTokensE">tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::numDraftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs18numDraftTokensHostE">tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::numDraftTokensHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs4stepE">tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::step (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs11targetProbsE">tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::targetProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs14useDraftLogitsE">tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::useDraftLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs18useDraftLogitsHostE">tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::useDraftLogitsHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs28useRandomAcceptanceThresholdE">tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::useRandomAcceptanceThreshold (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput13finishReasonsE">tensorrt_llm::runtime::DecodingInput::finishReasons (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput15generationStepsE">tensorrt_llm::runtime::DecodingInput::generationSteps (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput7lengthsE">tensorrt_llm::runtime::DecodingInput::lengths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput9logitsVecE">tensorrt_llm::runtime::DecodingInput::logitsVec (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput15lookaheadInputsE">tensorrt_llm::runtime::DecodingInput::lookaheadInputs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput15LookaheadInputsE">tensorrt_llm::runtime::DecodingInput::LookaheadInputs (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput15LookaheadInputs13tokensPerStepE">tensorrt_llm::runtime::DecodingInput::LookaheadInputs::tokensPerStep (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput18maxAttentionWindowE">tensorrt_llm::runtime::DecodingInput::maxAttentionWindow (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput14maxBadWordsLenE">tensorrt_llm::runtime::DecodingInput::maxBadWordsLen (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput9maxLengthE">tensorrt_llm::runtime::DecodingInput::maxLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput15maxStopWordsLenE">tensorrt_llm::runtime::DecodingInput::maxStopWordsLen (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputsE">tensorrt_llm::runtime::DecodingInput::MedusaInputs (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput12medusaInputsE">tensorrt_llm::runtime::DecodingInput::medusaInputs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs22medusaCurTokensPerStepE">tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaCurTokensPerStep (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs12medusaLogitsE">tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs11medusaPathsE">tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaPaths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs25medusaTargetTokensPerStepE">tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaTargetTokensPerStep (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs13medusaTreeIdsE">tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaTreeIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput17noRepeatNgramSizeE">tensorrt_llm::runtime::DecodingInput::noRepeatNgramSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput19sequenceLimitLengthE">tensorrt_llm::runtime::DecodingInput::sequenceLimitLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput15sinkTokenLengthE">tensorrt_llm::runtime::DecodingInput::sinkTokenLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput4stepE">tensorrt_llm::runtime::DecodingInput::step (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsLensE">tensorrt_llm::runtime::DecodingInput::stopWordsLens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput14stopWordsListsE">tensorrt_llm::runtime::DecodingInput::stopWordsLists (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsPtrsE">tensorrt_llm::runtime::DecodingInput::stopWordsPtrs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput14TensorConstPtrE">tensorrt_llm::runtime::DecodingInput::TensorConstPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput9TensorPtrE">tensorrt_llm::runtime::DecodingInput::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutputE">tensorrt_llm::runtime::DecodingOutput (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypothesesE">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14beamHypothesesE">tensorrt_llm::runtime::DecodingOutput::beamHypotheses (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses10batchDonesE">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::batchDones (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses14cumLogProbsCBAE">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::cumLogProbsCBA (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5emptyERK13BufferManager">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::empty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initERK13BufferManager11TokenIdType">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::init (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11logProbsCBAE">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::logProbsCBA (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18minNormedScoresCBAE">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::minNormedScoresCBA (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses15normedScoresCBAE">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::normedScoresCBA (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11numBeamsCBAE">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::numBeamsCBA (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses12outputIdsCBAE">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::outputIdsCBA (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7releaseEv">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::release (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE10SizeType3210SizeType3210SizeType32">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18sequenceLengthsCBAE">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::sequenceLengthsCBA (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE10SizeType3210SizeType32">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::slice (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput16cacheIndirectionE">tensorrt_llm::runtime::DecodingOutput::cacheIndirection (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput11cumLogProbsE">tensorrt_llm::runtime::DecodingOutput::cumLogProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14DecodingOutputEv">tensorrt_llm::runtime::DecodingOutput::DecodingOutput (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput12eagleBuffersE">tensorrt_llm::runtime::DecodingOutput::eagleBuffers (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput26explicitDraftTokensBuffersE">tensorrt_llm::runtime::DecodingOutput::explicitDraftTokensBuffers (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput11finishedSumE">tensorrt_llm::runtime::DecodingOutput::finishedSum (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput13finishReasonsE">tensorrt_llm::runtime::DecodingOutput::finishReasons (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput11gatheredIdsE">tensorrt_llm::runtime::DecodingOutput::gatheredIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput3idsE">tensorrt_llm::runtime::DecodingOutput::ids (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput17kNegativeInfinityE">tensorrt_llm::runtime::DecodingOutput::kNegativeInfinity (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput7lengthsE">tensorrt_llm::runtime::DecodingOutput::lengths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput8logProbsE">tensorrt_llm::runtime::DecodingOutput::logProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput13logProbsTiledE">tensorrt_llm::runtime::DecodingOutput::logProbsTiled (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput16lookaheadOutputsE">tensorrt_llm::runtime::DecodingOutput::lookaheadOutputs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput9newTokensE">tensorrt_llm::runtime::DecodingOutput::newTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14newTokensStepsE">tensorrt_llm::runtime::DecodingOutput::newTokensSteps (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput12newTokensVecE">tensorrt_llm::runtime::DecodingOutput::newTokensVec (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput9parentIdsE">tensorrt_llm::runtime::DecodingOutput::parentIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputsE">tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput26speculativeDecodingOutputsE">tensorrt_llm::runtime::DecodingOutput::speculativeDecodingOutputs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs21acceptedLengthsCumSumE">tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::acceptedLengthsCumSum (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs17acceptedTokensLenE">tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::acceptedTokensLen (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs15nextDraftTokensE">tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::nextDraftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs18nextDraftTokensLenE">tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::nextDraftTokensLen (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs12pathsOffsetsE">tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::pathsOffsets (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs18prevDraftTokensLenE">tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::prevDraftTokensLen (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput9TensorPtrE">tensorrt_llm::runtime::DecodingOutput::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime20DeviceAllocationNvlsE">tensorrt_llm::runtime::DeviceAllocationNvls (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls9_capacityE">tensorrt_llm::runtime::DeviceAllocationNvls::_capacity (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls7_handleE">tensorrt_llm::runtime::DeviceAllocationNvls::_handle (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls20DeviceAllocationNvlsEv">tensorrt_llm::runtime::DeviceAllocationNvls::DeviceAllocationNvls (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls4freeEv">tensorrt_llm::runtime::DeviceAllocationNvls::free (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls11getCapacityEv">tensorrt_llm::runtime::DeviceAllocationNvls::getCapacity (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls21getIpcUnicastPointersEv">tensorrt_llm::runtime::DeviceAllocationNvls::getIpcUnicastPointers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls19getMulticastPointerEv">tensorrt_llm::runtime::DeviceAllocationNvls::getMulticastPointer (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls17getUnicastPointerEv">tensorrt_llm::runtime::DeviceAllocationNvls::getUnicastPointer (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls5resetE6size_tNSt3setIiEE">tensorrt_llm::runtime::DeviceAllocationNvls::reset (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvlsD0Ev">tensorrt_llm::runtime::DeviceAllocationNvls::~DeviceAllocationNvls (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffersE">tensorrt_llm::runtime::EagleBuffers (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers9BufferPtrE">tensorrt_llm::runtime::EagleBuffers::BufferPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers28chunkedContextNextTokensHostE">tensorrt_llm::runtime::EagleBuffers::chunkedContextNextTokensHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers23cumSumGenerationLengthsE">tensorrt_llm::runtime::EagleBuffers::cumSumGenerationLengths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE">tensorrt_llm::runtime::EagleBuffers::EagleBuffers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers12engineInputsE">tensorrt_llm::runtime::EagleBuffers::engineInputs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputsE">tensorrt_llm::runtime::EagleBuffers::EngineOutputs (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers13engineOutputsE">tensorrt_llm::runtime::EagleBuffers::engineOutputs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs12acceptedLensE">tensorrt_llm::runtime::EagleBuffers::EngineOutputs::acceptedLens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs13acceptedPathsE">tensorrt_llm::runtime::EagleBuffers::EngineOutputs::acceptedPaths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs14acceptedTokensE">tensorrt_llm::runtime::EagleBuffers::EngineOutputs::acceptedTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs24chunkedContextNextTokensE">tensorrt_llm::runtime::EagleBuffers::EngineOutputs::chunkedContextNextTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs13nextDraftLensE">tensorrt_llm::runtime::EagleBuffers::EngineOutputs::nextDraftLens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs14nextDraftPathsE">tensorrt_llm::runtime::EagleBuffers::EngineOutputs::nextDraftPaths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs15nextDraftTokensE">tensorrt_llm::runtime::EagleBuffers::EngineOutputs::nextDraftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers18greedySamplingHostE">tensorrt_llm::runtime::EagleBuffers::greedySamplingHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6InputsE">tensorrt_llm::runtime::EagleBuffers::Inputs (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs22allLayersDraftTokenIdsE">tensorrt_llm::runtime::EagleBuffers::Inputs::allLayersDraftTokenIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs33allLayersDraftTokenIdsPredecessorE">tensorrt_llm::runtime::EagleBuffers::Inputs::allLayersDraftTokenIdsPredecessor (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs15allLayersScoresE">tensorrt_llm::runtime::EagleBuffers::Inputs::allLayersScores (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs24chunkedContextNextTokensE">tensorrt_llm::runtime::EagleBuffers::Inputs::chunkedContextNextTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig">tensorrt_llm::runtime::EagleBuffers::Inputs::create (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs20currentExpandIndicesE">tensorrt_llm::runtime::EagleBuffers::Inputs::currentExpandIndices (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs9draftLensE">tensorrt_llm::runtime::EagleBuffers::Inputs::draftLens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs10draftPathsE">tensorrt_llm::runtime::EagleBuffers::Inputs::draftPaths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs14draftPathsHostE">tensorrt_llm::runtime::EagleBuffers::Inputs::draftPathsHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs11draftTokensE">tensorrt_llm::runtime::EagleBuffers::Inputs::draftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs22dynamicTreeMaxTopKHostE">tensorrt_llm::runtime::EagleBuffers::Inputs::dynamicTreeMaxTopKHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29eagleNetCtxContextLengthsHostE">tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetCtxContextLengthsHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs34eagleNetCtxPastKeyValueLengthsHostE">tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetCtxPastKeyValueLengthsHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27eagleNetCtxRequestTypesHostE">tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetCtxRequestTypesHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29eagleNetGenContextLengthsHostE">tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetGenContextLengthsHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs34eagleNetGenPastKeyValueLengthsHostE">tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetGenPastKeyValueLengthsHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27eagleNetGenRequestTypesHostE">tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetGenRequestTypesHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18inputGenTokensHostE">tensorrt_llm::runtime::EagleBuffers::Inputs::inputGenTokensHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs14posteriorAlphaE">tensorrt_llm::runtime::EagleBuffers::Inputs::posteriorAlpha (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18posteriorThresholdE">tensorrt_llm::runtime::EagleBuffers::Inputs::posteriorThreshold (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs10prevScoresE">tensorrt_llm::runtime::EagleBuffers::Inputs::prevScores (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs16randomDataSampleE">tensorrt_llm::runtime::EagleBuffers::Inputs::randomDataSample (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs20randomDataValidationE">tensorrt_llm::runtime::EagleBuffers::Inputs::randomDataValidation (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29specDecodingGenerationLengthsE">tensorrt_llm::runtime::EagleBuffers::Inputs::specDecodingGenerationLengths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs33specDecodingGenerationLengthsHostE">tensorrt_llm::runtime::EagleBuffers::Inputs::specDecodingGenerationLengthsHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs23specDecodingPackedMasksE">tensorrt_llm::runtime::EagleBuffers::Inputs::specDecodingPackedMasks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27specDecodingPositionOffsetsE">tensorrt_llm::runtime::EagleBuffers::Inputs::specDecodingPositionOffsets (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs12temperaturesE">tensorrt_llm::runtime::EagleBuffers::Inputs::temperatures (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18useDynamicTreeHostE">tensorrt_llm::runtime::EagleBuffers::Inputs::useDynamicTreeHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs15useSpecDecodingE">tensorrt_llm::runtime::EagleBuffers::Inputs::useSpecDecoding (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime12EagleBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE">tensorrt_llm::runtime::EagleBuffers::insertInputTensors (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers7ITensorE">tensorrt_llm::runtime::EagleBuffers::ITensor (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers13LlmRequestPtrE">tensorrt_llm::runtime::EagleBuffers::LlmRequestPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers19maxGenerationLengthE">tensorrt_llm::runtime::EagleBuffers::maxGenerationLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers26mDefaultPosteriorThresholdE">tensorrt_llm::runtime::EagleBuffers::mDefaultPosteriorThreshold (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers17mDoGreedySamplingE">tensorrt_llm::runtime::EagleBuffers::mDoGreedySampling (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers18posteriorAlphaHostE">tensorrt_llm::runtime::EagleBuffers::posteriorAlphaHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers22posteriorThresholdHostE">tensorrt_llm::runtime::EagleBuffers::posteriorThresholdHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers13RequestVectorE">tensorrt_llm::runtime::EagleBuffers::RequestVector (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE">tensorrt_llm::runtime::EagleBuffers::reshape (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers21scanReduceTempStorageE">tensorrt_llm::runtime::EagleBuffers::scanReduceTempStorage (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers26scanReduceTempStorageBytesE">tensorrt_llm::runtime::EagleBuffers::scanReduceTempStorageBytes (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE">tensorrt_llm::runtime::EagleBuffers::setFromInputs (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers10SizeType32E">tensorrt_llm::runtime::EagleBuffers::SizeType32 (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers9TensorMapE">tensorrt_llm::runtime::EagleBuffers::TensorMap (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers9TensorPtrE">tensorrt_llm::runtime::EagleBuffers::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11EagleModuleE">tensorrt_llm::runtime::EagleModule (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32">tensorrt_llm::runtime::EagleModule::EagleModule (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11EagleModule22getDefaultEagleChoicesEv">tensorrt_llm::runtime::EagleModule::getDefaultEagleChoices (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11EagleModule26getMaxNonLeafNodesPerLayerEv">tensorrt_llm::runtime::EagleModule::getMaxNonLeafNodesPerLayer (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11EagleModule23getNumTransformerLayersEv">tensorrt_llm::runtime::EagleModule::getNumTransformerLayers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11EagleModule20mDefaultEagleChoicesE">tensorrt_llm::runtime::EagleModule::mDefaultEagleChoices (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11EagleModule24mMaxNonLeafNodesPerLayerE">tensorrt_llm::runtime::EagleModule::mMaxNonLeafNodesPerLayer (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11EagleModule21mNumTransformersLayerE">tensorrt_llm::runtime::EagleModule::mNumTransformersLayer (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffersE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9BufferPtrE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::BufferPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers23cumSumGenerationLengthsE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::cumSumGenerationLengths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputsE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineInputs (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12engineInputsE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::engineInputs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputs15positionOffsetsE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineInputs::positionOffsets (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputs18requestTypesDeviceE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineInputs::requestTypesDevice (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputsE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13engineOutputsE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::engineOutputs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15bestPathIndicesE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::bestPathIndices (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15bestPathLengthsE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::bestPathLengths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs5masksE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::masks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs11maxGenTokenE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::maxGenToken (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs16nextDraftIndicesE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextDraftIndices (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs14nextDraftProbsE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextDraftProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15nextDraftTokensE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextDraftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs14nextFlatTokensE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextFlatTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs21nextGenerationLengthsE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextGenerationLengths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs19nextPositionOffsetsE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextPositionOffsets (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs17packedPositionIdsE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::packedPositionIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs13totalGenTokenE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::totalGenToken (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6InputsE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::create (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs12draftIndicesE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::draftIndices (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs10draftProbsE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::draftProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11draftTokensE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::draftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs17generationLengthsE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::generationLengths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs21generationLengthsHostE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::generationLengthsHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs16maxGenLengthHostE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::maxGenLengthHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11packedMasksE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::packedMasks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11positionIdsE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::positionIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs15positionIdsBaseE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::positionIdsBase (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs16randomDataSampleE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::randomDataSample (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs20randomDataValidationE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::randomDataValidation (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs12temperaturesE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::temperatures (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs15useSpecDecodingE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::useSpecDecoding (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::insertInputTensors (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7ITensorE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ITensor (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::reshape (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers15scanTempStorageE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::scanTempStorage (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers20scanTempStorageBytesE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::scanTempStorageBytes (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers10SizeType32E">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::SizeType32 (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9TensorMapE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::TensorMap (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9TensorPtrE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime25GenericPromptTuningParamsE">tensorrt_llm::runtime::GenericPromptTuningParams (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams14embeddingTableE">tensorrt_llm::runtime::GenericPromptTuningParams::embeddingTable (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr">tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams19promptTuningEnabledE">tensorrt_llm::runtime::GenericPromptTuningParams::promptTuningEnabled (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams10SizeType32E">tensorrt_llm::runtime::GenericPromptTuningParams::SizeType32 (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams5tasksE">tensorrt_llm::runtime::GenericPromptTuningParams::tasks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9TensorPtrE">tensorrt_llm::runtime::GenericPromptTuningParams::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9vocabSizeE">tensorrt_llm::runtime::GenericPromptTuningParams::vocabSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20getDefaultBatchSlotsEN7runtime10SizeType32E">tensorrt_llm::runtime::getDefaultBatchSlots (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25getVirtualMemoryAllocatorEv">tensorrt_llm::runtime::getVirtualMemoryAllocator (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23getVirtualMemoryManagerEv">tensorrt_llm::runtime::getVirtualMemoryManager (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime10GptDecoderE">tensorrt_llm::runtime::GptDecoder (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder13CudaStreamPtrE">tensorrt_llm::runtime::GptDecoder::CudaStreamPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr">tensorrt_llm::runtime::GptDecoder::disableLookahead (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput">tensorrt_llm::runtime::GptDecoder::forwardAsync (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput">tensorrt_llm::runtime::GptDecoder::forwardSync (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder17getSamplingConfigEv">tensorrt_llm::runtime::GptDecoder::getSamplingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE">tensorrt_llm::runtime::GptDecoder::GptDecoder (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder23mDecodingLayerWorkspaceE">tensorrt_llm::runtime::GptDecoder::mDecodingLayerWorkspace (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder13mDecodingModeE">tensorrt_llm::runtime::GptDecoder::mDecodingMode (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder19mDynamicDecodeLayerE">tensorrt_llm::runtime::GptDecoder::mDynamicDecodeLayer (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder8mManagerE">tensorrt_llm::runtime::GptDecoder::mManager (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder16mMaxNumSequencesE">tensorrt_llm::runtime::GptDecoder::mMaxNumSequences (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder15mSamplingConfigE">tensorrt_llm::runtime::GptDecoder::mSamplingConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder10mVocabSizeE">tensorrt_llm::runtime::GptDecoder::mVocabSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder16mVocabSizePaddedE">tensorrt_llm::runtime::GptDecoder::mVocabSizePadded (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE">tensorrt_llm::runtime::GptDecoder::setup (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder9TensorPtrE">tensorrt_llm::runtime::GptDecoder::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatchedE">tensorrt_llm::runtime::GptDecoderBatched (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13CudaStreamPtrE">tensorrt_llm::runtime::GptDecoderBatched::CudaStreamPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr">tensorrt_llm::runtime::GptDecoderBatched::disableLookahead (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb">tensorrt_llm::runtime::GptDecoderBatched::finalize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched7forwardERKN7decoder12DecoderStateERKN13decoder_batch5InputE">tensorrt_llm::runtime::GptDecoderBatched::forward (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERKN13decoder_batch5InputE">tensorrt_llm::runtime::GptDecoderBatched::forwardAsync (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERKN7decoder12DecoderStateERKN13decoder_batch5InputE">tensorrt_llm::runtime::GptDecoderBatched::forwardDispatch (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched16getBufferManagerEv">tensorrt_llm::runtime::GptDecoderBatched::getBufferManager (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched16getDecoderStreamEv">tensorrt_llm::runtime::GptDecoderBatched::getDecoderStream (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched20getUnderlyingDecoderEv">tensorrt_llm::runtime::GptDecoderBatched::getUnderlyingDecoder (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched17GptDecoderBatchedE13CudaStreamPtr">tensorrt_llm::runtime::GptDecoderBatched::GptDecoderBatched (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13GptDecoderPtrE">tensorrt_llm::runtime::GptDecoderBatched::GptDecoderPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13LlmRequestPtrE">tensorrt_llm::runtime::GptDecoderBatched::LlmRequestPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mBufferManagerE">tensorrt_llm::runtime::GptDecoderBatched::mBufferManager (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched8mDecoderE">tensorrt_llm::runtime::GptDecoderBatched::mDecoder (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mDecoderStreamE">tensorrt_llm::runtime::GptDecoderBatched::mDecoderStream (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mRuntimeStreamE">tensorrt_llm::runtime::GptDecoderBatched::mRuntimeStream (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13RequestVectorE">tensorrt_llm::runtime::GptDecoderBatched::RequestVector (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig">tensorrt_llm::runtime::GptDecoderBatched::setup (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched9TensorPtrE">tensorrt_llm::runtime::GptDecoderBatched::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfigE">tensorrt_llm::runtime::GptJsonConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfig">tensorrt_llm::runtime::GptJsonConfig::engineFilename (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig21getContextParallelismEv">tensorrt_llm::runtime::GptJsonConfig::getContextParallelism (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getGpusPerNodeEv">tensorrt_llm::runtime::GptJsonConfig::getGpusPerNode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getModelConfigEv">tensorrt_llm::runtime::GptJsonConfig::getModelConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig21getModelConfigMutableEv">tensorrt_llm::runtime::GptJsonConfig::getModelConfigMutable (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig7getNameEv">tensorrt_llm::runtime::GptJsonConfig::getName (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig22getPipelineParallelismEv">tensorrt_llm::runtime::GptJsonConfig::getPipelineParallelism (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getPrecisionEv">tensorrt_llm::runtime::GptJsonConfig::getPrecision (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig18getRuntimeDefaultsEv">tensorrt_llm::runtime::GptJsonConfig::getRuntimeDefaults (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig20getTensorParallelismEv">tensorrt_llm::runtime::GptJsonConfig::getTensorParallelism (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig10getVersionEv">tensorrt_llm::runtime::GptJsonConfig::getVersion (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getWorldSizeEv">tensorrt_llm::runtime::GptJsonConfig::getWorldSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE">tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig19mContextParallelismE">tensorrt_llm::runtime::GptJsonConfig::mContextParallelism (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig12mGpusPerNodeE">tensorrt_llm::runtime::GptJsonConfig::mGpusPerNode (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig12mModelConfigE">tensorrt_llm::runtime::GptJsonConfig::mModelConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5mNameE">tensorrt_llm::runtime::GptJsonConfig::mName (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig20mPipelineParallelismE">tensorrt_llm::runtime::GptJsonConfig::mPipelineParallelism (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig10mPrecisionE">tensorrt_llm::runtime::GptJsonConfig::mPrecision (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig16mRuntimeDefaultsE">tensorrt_llm::runtime::GptJsonConfig::mRuntimeDefaults (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig18mTensorParallelismE">tensorrt_llm::runtime::GptJsonConfig::mTensorParallelism (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig8mVersionE">tensorrt_llm::runtime::GptJsonConfig::mVersion (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt10filesystem4pathE">tensorrt_llm::runtime::GptJsonConfig::parse (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt6stringE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERNSt7istreamE">[2]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBufferE">tensorrt_llm::runtime::IBuffer (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE">tensorrt_llm::runtime::IBuffer::data (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer4dataEv">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataEv">[3]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer8DataTypeE">tensorrt_llm::runtime::IBuffer::DataType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7IBuffer11getCapacityEv">tensorrt_llm::runtime::IBuffer::getCapacity (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7IBuffer11getDataTypeEv">tensorrt_llm::runtime::IBuffer::getDataType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer15getDataTypeNameE8DataType">tensorrt_llm::runtime::IBuffer::getDataTypeName (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7IBuffer15getDataTypeNameEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7IBuffer13getMemoryTypeEv">tensorrt_llm::runtime::IBuffer::getMemoryType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7IBuffer17getMemoryTypeNameEv">tensorrt_llm::runtime::IBuffer::getMemoryTypeName (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7IBuffer7getSizeEv">tensorrt_llm::runtime::IBuffer::getSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7IBuffer14getSizeInBytesEv">tensorrt_llm::runtime::IBuffer::getSizeInBytes (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferERK7IBuffer">tensorrt_llm::runtime::IBuffer::IBuffer (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer10memoryTypeEPKv">tensorrt_llm::runtime::IBuffer::memoryType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBufferaSERK7IBuffer">tensorrt_llm::runtime::IBuffer::operator= (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer7releaseEv">tensorrt_llm::runtime::IBuffer::release (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer6resizeENSt6size_tE">tensorrt_llm::runtime::IBuffer::resize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer14SharedConstPtrE">tensorrt_llm::runtime::IBuffer::SharedConstPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer9SharedPtrE">tensorrt_llm::runtime::IBuffer::SharedPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE">tensorrt_llm::runtime::IBuffer::slice (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE">[3]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7IBuffer7toBytesENSt6size_tE">tensorrt_llm::runtime::IBuffer::toBytes (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer14UniqueConstPtrE">tensorrt_llm::runtime::IBuffer::UniqueConstPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer9UniquePtrE">tensorrt_llm::runtime::IBuffer::UniquePtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE">tensorrt_llm::runtime::IBuffer::view (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtr">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE">[2]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE">tensorrt_llm::runtime::IBuffer::wrap (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE">[4]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBufferD0Ev">tensorrt_llm::runtime::IBuffer::~IBuffer (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11IGptDecoderE">tensorrt_llm::runtime::IGptDecoder (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE">tensorrt_llm::runtime::IGptDecoder::create (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11IGptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr">tensorrt_llm::runtime::IGptDecoder::disableLookahead (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput">tensorrt_llm::runtime::IGptDecoder::forwardAsync (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11IGptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput">tensorrt_llm::runtime::IGptDecoder::forwardSync (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11IGptDecoder17getSamplingConfigEv">tensorrt_llm::runtime::IGptDecoder::getSamplingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE">tensorrt_llm::runtime::IGptDecoder::setup (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11IGptDecoder14TensorConstPtrE">tensorrt_llm::runtime::IGptDecoder::TensorConstPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11IGptDecoder9TensorPtrE">tensorrt_llm::runtime::IGptDecoder::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11IGptDecoderD0Ev">tensorrt_llm::runtime::IGptDecoder::~IGptDecoder (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatchedE">tensorrt_llm::runtime::IGptDecoderBatched (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13CudaStreamPtrE">tensorrt_llm::runtime::IGptDecoderBatched::CudaStreamPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr">tensorrt_llm::runtime::IGptDecoderBatched::disableLookahead (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb">tensorrt_llm::runtime::IGptDecoderBatched::finalize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERKN7decoder12DecoderStateERKN13decoder_batch5InputE">tensorrt_llm::runtime::IGptDecoderBatched::forward (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERKN13decoder_batch5InputE">tensorrt_llm::runtime::IGptDecoderBatched::forwardAsync (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched18IGptDecoderBatchedEv">tensorrt_llm::runtime::IGptDecoderBatched::IGptDecoderBatched (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13LlmRequestPtrE">tensorrt_llm::runtime::IGptDecoderBatched::LlmRequestPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13RequestVectorE">tensorrt_llm::runtime::IGptDecoderBatched::RequestVector (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig">tensorrt_llm::runtime::IGptDecoderBatched::setup (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched9TensorPtrE">tensorrt_llm::runtime::IGptDecoderBatched::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatchedD0Ev">tensorrt_llm::runtime::IGptDecoderBatched::~IGptDecoderBatched (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemoryE">tensorrt_llm::runtime::IpcMemory (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfig">tensorrt_llm::runtime::IpcMemory::allocateIpcMemory (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemory9BufferPtrE">tensorrt_llm::runtime::IpcMemory::BufferPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemory16destroyIpcMemoryEv">tensorrt_llm::runtime::IpcMemory::destroyIpcMemory (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemory10FLAGS_SIZEE">tensorrt_llm::runtime::IpcMemory::FLAGS_SIZE (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9IpcMemory11getCommPtrsEv">tensorrt_llm::runtime::IpcMemory::getCommPtrs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb">tensorrt_llm::runtime::IpcMemory::IpcMemory (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERK9IpcMemory">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERR9IpcMemory">[2]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemory7mBufferE">tensorrt_llm::runtime::IpcMemory::mBuffer (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemory9mCommPtrsE">tensorrt_llm::runtime::IpcMemory::mCommPtrs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemory8mOpenIpcE">tensorrt_llm::runtime::IpcMemory::mOpenIpc (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemory7mTpRankE">tensorrt_llm::runtime::IpcMemory::mTpRank (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemoryaSERK9IpcMemory">tensorrt_llm::runtime::IpcMemory::operator= (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemoryaSERR9IpcMemory">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemoryD0Ev">tensorrt_llm::runtime::IpcMemory::~IpcMemory (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15ipcNvlsAllocateE6size_tNSt3setIiEE">tensorrt_llm::runtime::ipcNvlsAllocate (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ipcNvlsFreeEP13IpcNvlsHandle">tensorrt_llm::runtime::ipcNvlsFree (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandleE">tensorrt_llm::runtime::IpcNvlsHandle (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle14ipc_uc_handlesE">tensorrt_llm::runtime::IpcNvlsHandle::ipc_uc_handles (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle11ipc_uc_ptrsE">tensorrt_llm::runtime::IpcNvlsHandle::ipc_uc_ptrs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle10ipc_uc_vasE">tensorrt_llm::runtime::IpcNvlsHandle::ipc_uc_vas (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle9mc_handleE">tensorrt_llm::runtime::IpcNvlsHandle::mc_handle (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle6mc_ptrE">tensorrt_llm::runtime::IpcNvlsHandle::mc_ptr (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle5mc_vaE">tensorrt_llm::runtime::IpcNvlsHandle::mc_va (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle4sizeE">tensorrt_llm::runtime::IpcNvlsHandle::size (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle9uc_handleE">tensorrt_llm::runtime::IpcNvlsHandle::uc_handle (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle6uc_ptrE">tensorrt_llm::runtime::IpcNvlsHandle::uc_ptr (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle5uc_vaE">tensorrt_llm::runtime::IpcNvlsHandle::uc_va (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16ipcNvlsSupportedEv">tensorrt_llm::runtime::ipcNvlsSupported (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensorE">tensorrt_llm::runtime::ITensor (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atE14UniqueConstPtrRR9TConstPtrRK5Shape">tensorrt_llm::runtime::ITensor::at (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atEN7ITensor14UniqueConstPtrERR9TConstPtrRKNSt16initializer_listI9DimType64EE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRK5Shape">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRKNSt16initializer_listI9DimType64EE">[3]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor8castSizeE6size_t">tensorrt_llm::runtime::ITensor::castSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor9DimType64E">tensorrt_llm::runtime::ITensor::DimType64 (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor8flattenNE9SharedPtrNSt7int64_tE">tensorrt_llm::runtime::ITensor::flattenN (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I_10SizeType32ENK12tensorrt_llm7runtime7ITensor12getDimensionE9DimType64v">tensorrt_llm::runtime::ITensor::getDimension (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7ITensor8getShapeEv">tensorrt_llm::runtime::ITensor::getShape (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorERK7ITensor">tensorrt_llm::runtime::ITensor::ITensor (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor9makeShapeERKNSt16initializer_listI9DimType64EE">tensorrt_llm::runtime::ITensor::makeShape (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensoraSERK7ITensor">tensorrt_llm::runtime::ITensor::operator= (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor7reshapeERK5Shape">tensorrt_llm::runtime::ITensor::reshape (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor6resizeENSt6size_tE">tensorrt_llm::runtime::ITensor::resize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor5ShapeE">tensorrt_llm::runtime::ITensor::Shape (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32">tensorrt_llm::runtime::ITensor::shapeEquals (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T10SizeType32">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor11shapeEqualsERK5ShapeRK5Shape">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERK5Shape">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERKNSt16initializer_listI10SizeType32EE">[4]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor14SharedConstPtrE">tensorrt_llm::runtime::ITensor::SharedConstPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor9SharedPtrE">tensorrt_llm::runtime::ITensor::SharedPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE">tensorrt_llm::runtime::ITensor::slice (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5Shape">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EE">[4]</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE">[5]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE">[6]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE">[7]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape">[8]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape9DimType64">[9]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE">[10]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE9DimType64">[11]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeE10SizeType32">tensorrt_llm::runtime::ITensor::squeeze (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape10SizeType32">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor7stridesERK5Shape">tensorrt_llm::runtime::ITensor::strides (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor9TensorMapE">tensorrt_llm::runtime::ITensor::TensorMap (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor8toStringERK5Shape">tensorrt_llm::runtime::ITensor::toString (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor14UniqueConstPtrE">tensorrt_llm::runtime::ITensor::UniqueConstPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor9UniquePtrE">tensorrt_llm::runtime::ITensor::UniquePtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeE10SizeType32">tensorrt_llm::runtime::ITensor::unsqueeze (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape10SizeType32">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape">tensorrt_llm::runtime::ITensor::view (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor4viewE9SharedPtr">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape">[2]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor6volumeERK5Shape">tensorrt_llm::runtime::ITensor::volume (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor17volumeNonNegativeERK5Shape">tensorrt_llm::runtime::ITensor::volumeNonNegative (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape">tensorrt_llm::runtime::ITensor::wrap (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE">[4]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensorD0Ev">tensorrt_llm::runtime::ITensor::~ITensor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t">tensorrt_llm::runtime::lamportInitializeAll (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I_bEN12tensorrt_llm7runtime12LocalCreatorE">tensorrt_llm::runtime::LocalCreator (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12LocalCreator6createEv">tensorrt_llm::runtime::LocalCreator::create (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12LocalCreator12LocalCreatorERK19CUmemAllocationProp6size_t">tensorrt_llm::runtime::LocalCreator::LocalCreator (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12LocalCreator5mPropE">tensorrt_llm::runtime::LocalCreator::mProp (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12LocalCreator5mSizeE">tensorrt_llm::runtime::LocalCreator::mSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12LocalCreator7releaseE28CUmemGenericAllocationHandleb">tensorrt_llm::runtime::LocalCreator::release (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffersE">tensorrt_llm::runtime::LookaheadDecodingBuffers (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers17generationLengthsE">tensorrt_llm::runtime::LookaheadDecodingBuffers::generationLengths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers24LookaheadDecodingBuffersE10SizeType3210SizeType32RK13BufferManager">tensorrt_llm::runtime::LookaheadDecodingBuffers::LookaheadDecodingBuffers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers11packedMasksE">tensorrt_llm::runtime::LookaheadDecodingBuffers::packedMasks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers11positionIdsE">tensorrt_llm::runtime::LookaheadDecodingBuffers::positionIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers15positionOffsetsE">tensorrt_llm::runtime::LookaheadDecodingBuffers::positionOffsets (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers9TensorPtrE">tensorrt_llm::runtime::LookaheadDecodingBuffers::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15LookaheadModuleE">tensorrt_llm::runtime::LookaheadModule (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime15LookaheadModule18getExecutionConfigEv">tensorrt_llm::runtime::LookaheadModule::getExecutionConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleE10SizeType3210SizeType32">tensorrt_llm::runtime::LookaheadModule::LookaheadModule (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15LookaheadModule16mExecutionConfigE">tensorrt_llm::runtime::LookaheadModule::mExecutionConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15LookaheadModule18setExecutionConfigERKN8executor23LookaheadDecodingConfigE">tensorrt_llm::runtime::LookaheadModule::setExecutionConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffersE">tensorrt_llm::runtime::LookaheadRuntimeBuffers (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers18batchSlotsHostCopyE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::batchSlotsHostCopy (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers12cumSumLengthE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::cumSumLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers24disableLookaheadDecodingEv">tensorrt_llm::runtime::LookaheadRuntimeBuffers::disableLookaheadDecoding (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23enableLookaheadDecodingE10SizeType3210SizeType32">tensorrt_llm::runtime::LookaheadRuntimeBuffers::enableLookaheadDecoding (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23generationLengthsDeviceE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::generationLengthsDevice (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers21generationLengthsHostE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::generationLengthsHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers25generationLengthsHostCopyE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::generationLengthsHostCopy (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers18insertInputTensorsER9TensorMapR9TensorMapRK11WorldConfig">tensorrt_llm::runtime::LookaheadRuntimeBuffers::insertInputTensors (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime">tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers14packedMaskHostE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::packedMaskHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers18packedMaskHostCopyE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::packedMaskHostCopy (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers17packedMasksDeviceE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::packedMasksDevice (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers17positionIdsDeviceE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionIdsDevice (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers15positionIdsHostE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionIdsHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers19positionIdsHostCopyE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionIdsHostCopy (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers21positionOffsetsDeviceE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionOffsetsDevice (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers19positionOffsetsHostE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionOffsetsHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23positionOffsetsHostCopyE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionOffsetsHostCopy (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers7reshapeE10SizeType3210SizeType3210SizeType32">tensorrt_llm::runtime::LookaheadRuntimeBuffers::reshape (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig">tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers9TensorMapE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::TensorMap (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers9TensorPtrE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers15useSpecDecodingE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::useSpecDecoding (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCacheE">tensorrt_llm::runtime::LoraCache (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache4bumpE10TaskIdType">tensorrt_llm::runtime::LoraCache::bump (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache18bumpTaskInProgressE10TaskIdType">tensorrt_llm::runtime::LoraCache::bumpTaskInProgress (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache19claimPagesWithEvictE10SizeType32">tensorrt_llm::runtime::LoraCache::claimPagesWithEvict (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb">tensorrt_llm::runtime::LoraCache::copyTask (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache">tensorrt_llm::runtime::LoraCache::copyTaskMapPages (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE">tensorrt_llm::runtime::LoraCache::copyToPages (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE10TaskIdType">tensorrt_llm::runtime::LoraCache::determineNumPages (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE9TensorPtr">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9LoraCache4fitsE9TensorPtr">tensorrt_llm::runtime::LoraCache::fits (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache3getE10TaskIdType">tensorrt_llm::runtime::LoraCache::get (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9LoraCache11getNumPagesEv">tensorrt_llm::runtime::LoraCache::getNumPages (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9LoraCache10getPagePtrE6size_t">tensorrt_llm::runtime::LoraCache::getPagePtr (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9LoraCache9getStatusE10TaskIdType">tensorrt_llm::runtime::LoraCache::getStatus (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9LoraCache3hasE10TaskIdType">tensorrt_llm::runtime::LoraCache::has (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9LoraCache6isDoneE10TaskIdType">tensorrt_llm::runtime::LoraCache::isDone (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9LoraCache8isLoadedE10TaskIdType">tensorrt_llm::runtime::LoraCache::isLoaded (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr">tensorrt_llm::runtime::LoraCache::loadWeights (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager">tensorrt_llm::runtime::LoraCache::LoraCache (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache11markAllDoneEv">tensorrt_llm::runtime::LoraCache::markAllDone (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache12markTaskDoneE10TaskIdType">tensorrt_llm::runtime::LoraCache::markTaskDone (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache14mBufferManagerE">tensorrt_llm::runtime::LoraCache::mBufferManager (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9mCacheMapE">tensorrt_llm::runtime::LoraCache::mCacheMap (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache11mCacheMutexE">tensorrt_llm::runtime::LoraCache::mCacheMutex (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache17mCachePageManagerE">tensorrt_llm::runtime::LoraCache::mCachePageManager (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21mDeviceBufferManagersE">tensorrt_llm::runtime::LoraCache::mDeviceBufferManagers (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache10mDoneTasksE">tensorrt_llm::runtime::LoraCache::mDoneTasks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache16mInProgressTasksE">tensorrt_llm::runtime::LoraCache::mInProgressTasks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache12mModelConfigE">tensorrt_llm::runtime::LoraCache::mModelConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache17mModuleIdToModuleE">tensorrt_llm::runtime::LoraCache::mModuleIdToModule (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache18mPageManagerConfigE">tensorrt_llm::runtime::LoraCache::mPageManagerConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache11mPagesMutexE">tensorrt_llm::runtime::LoraCache::mPagesMutex (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache12mWorldConfigE">tensorrt_llm::runtime::LoraCache::mWorldConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb">tensorrt_llm::runtime::LoraCache::put (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32">tensorrt_llm::runtime::LoraCache::splitTransposeCpu (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32">tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache10TaskIdTypeE">tensorrt_llm::runtime::LoraCache::TaskIdType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig11adapterSizeE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::adapterSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig6inSizeE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::inSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7layerIdE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::layerId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8moduleIdE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::moduleId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8numSlotsE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::numSlots (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigeqERKN9LoraCache21TaskLayerModuleConfigE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7outSizeE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::outSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig6pageIdE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::pageId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig17scalingVecPointerE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::scalingVecPointer (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7slotIdxE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::slotIdx (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8toStringEv">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::toString (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig16weightsInPointerE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::weightsInPointer (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig17weightsOutPointerE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::weightsOutPointer (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache28TaskLayerModuleConfigListPtrE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfigListPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueE">tensorrt_llm::runtime::LoraCache::TaskValue (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue7configsE">tensorrt_llm::runtime::LoraCache::TaskValue::configs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue4doneE">tensorrt_llm::runtime::LoraCache::TaskValue::done (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue10inProgressE">tensorrt_llm::runtime::LoraCache::TaskValue::inProgress (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue2itE">tensorrt_llm::runtime::LoraCache::TaskValue::it (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue6loadedE">tensorrt_llm::runtime::LoraCache::TaskValue::loaded (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue14loadInProgressE">tensorrt_llm::runtime::LoraCache::TaskValue::loadInProgress (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueaSERR9TaskValue">tensorrt_llm::runtime::LoraCache::TaskValue::operator= (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue7pageIdsE">tensorrt_llm::runtime::LoraCache::TaskValue::pageIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb">tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERR9TaskValue">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueEv">[2]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueD0Ev">tensorrt_llm::runtime::LoraCache::TaskValue::~TaskValue (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache12TaskValuePtrE">tensorrt_llm::runtime::LoraCache::TaskValuePtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TensorPtrE">tensorrt_llm::runtime::LoraCache::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatusE">tensorrt_llm::runtime::LoraCache::ValueStatus (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus20kVALUE_STATUS_LOADEDE">tensorrt_llm::runtime::LoraCache::ValueStatus::kVALUE_STATUS_LOADED (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus21kVALUE_STATUS_MISSINGE">tensorrt_llm::runtime::LoraCache::ValueStatus::kVALUE_STATUS_MISSING (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus24kVALUE_STATUS_PROCESSINGE">tensorrt_llm::runtime::LoraCache::ValueStatus::kVALUE_STATUS_PROCESSING (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22LoraCacheFullExceptionE">tensorrt_llm::runtime::LoraCacheFullException (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22LoraCacheFullException22LoraCacheFullExceptionERKNSt6stringE">tensorrt_llm::runtime::LoraCacheFullException::LoraCacheFullException (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22LoraCacheFullExceptionD0Ev">tensorrt_llm::runtime::LoraCacheFullException::~LoraCacheFullException (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20LoraCachePageManagerE">tensorrt_llm::runtime::LoraCachePageManager (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager8blockPtrE10SizeType32">tensorrt_llm::runtime::LoraCachePageManager::blockPtr (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10claimPagesE10SizeType32">tensorrt_llm::runtime::LoraCachePageManager::claimPages (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10initializeERK13BufferManager">tensorrt_llm::runtime::LoraCachePageManager::initialize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager20LoraCachePageManagerERK26LoraCachePageManagerConfigRK13BufferManager">tensorrt_llm::runtime::LoraCachePageManager::LoraCachePageManager (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager7mConfigE">tensorrt_llm::runtime::LoraCachePageManager::mConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12mFreePageIdsE">tensorrt_llm::runtime::LoraCachePageManager::mFreePageIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager11mIsPageFreeE">tensorrt_llm::runtime::LoraCachePageManager::mIsPageFree (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager11mPageBlocksE">tensorrt_llm::runtime::LoraCachePageManager::mPageBlocks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager14mutablePagePtrENSt6size_tE">tensorrt_llm::runtime::LoraCachePageManager::mutablePagePtr (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager17numAvailablePagesEv">tensorrt_llm::runtime::LoraCachePageManager::numAvailablePages (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager7pagePtrENSt6size_tE">tensorrt_llm::runtime::LoraCachePageManager::pagePtr (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12releasePagesERKNSt6vectorINSt6size_tEEE">tensorrt_llm::runtime::LoraCachePageManager::releasePages (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager9TensorPtrE">tensorrt_llm::runtime::LoraCachePageManager::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfigE">tensorrt_llm::runtime::LoraCachePageManagerConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig11getDataTypeEv">tensorrt_llm::runtime::LoraCachePageManagerConfig::getDataType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig13getInitToZeroEv">tensorrt_llm::runtime::LoraCachePageManagerConfig::getInitToZero (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig19getMaxPagesPerBlockEv">tensorrt_llm::runtime::LoraCachePageManagerConfig::getMaxPagesPerBlock (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig13getMemoryTypeEv">tensorrt_llm::runtime::LoraCachePageManagerConfig::getMemoryType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig17getNumCopyStreamsEv">tensorrt_llm::runtime::LoraCachePageManagerConfig::getNumCopyStreams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig12getPageWidthEv">tensorrt_llm::runtime::LoraCachePageManagerConfig::getPageWidth (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig15getSlotsPerPageEv">tensorrt_llm::runtime::LoraCachePageManagerConfig::getSlotsPerPage (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig16getTotalNumPagesEv">tensorrt_llm::runtime::LoraCachePageManagerConfig::getTotalNumPages (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32">tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig9mDataTypeE">tensorrt_llm::runtime::LoraCachePageManagerConfig::mDataType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11mInitToZeroE">tensorrt_llm::runtime::LoraCachePageManagerConfig::mInitToZero (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17mMaxPagesPerBlockE">tensorrt_llm::runtime::LoraCachePageManagerConfig::mMaxPagesPerBlock (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11mMemoryTypeE">tensorrt_llm::runtime::LoraCachePageManagerConfig::mMemoryType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15mNumCopyStreamsE">tensorrt_llm::runtime::LoraCachePageManagerConfig::mNumCopyStreams (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig10mPageWidthE">tensorrt_llm::runtime::LoraCachePageManagerConfig::mPageWidth (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13mSlotsPerPageE">tensorrt_llm::runtime::LoraCachePageManagerConfig::mSlotsPerPage (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig14mTotalNumPagesE">tensorrt_llm::runtime::LoraCachePageManagerConfig::mTotalNumPages (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11setDataTypeERKN8nvinfer18DataTypeE">tensorrt_llm::runtime::LoraCachePageManagerConfig::setDataType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setInitToZeroEb">tensorrt_llm::runtime::LoraCachePageManagerConfig::setInitToZero (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig19setMaxPagesPerBlockERK10SizeType32">tensorrt_llm::runtime::LoraCachePageManagerConfig::setMaxPagesPerBlock (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setMemoryTypeERKN7runtime10MemoryTypeE">tensorrt_llm::runtime::LoraCachePageManagerConfig::setMemoryType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17setNumCopyStreamsE10SizeType32">tensorrt_llm::runtime::LoraCachePageManagerConfig::setNumCopyStreams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig12setPageWidthERK10SizeType32">tensorrt_llm::runtime::LoraCachePageManagerConfig::setPageWidth (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setSlotsPerPageERK10SizeType32">tensorrt_llm::runtime::LoraCachePageManagerConfig::setSlotsPerPage (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setTotalNumPageERK10SizeType32">tensorrt_llm::runtime::LoraCachePageManagerConfig::setTotalNumPage (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime21LoraExpectedExceptionE">tensorrt_llm::runtime::LoraExpectedException (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime21LoraExpectedException21LoraExpectedExceptionERKNSt6stringE">tensorrt_llm::runtime::LoraExpectedException::LoraExpectedException (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime21LoraExpectedExceptionD0Ev">tensorrt_llm::runtime::LoraExpectedException::~LoraExpectedException (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModuleE">tensorrt_llm::runtime::LoraModule (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32">tensorrt_llm::runtime::LoraModule::createLoraModules (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule18flattenedInOutSizeE10SizeType32b">tensorrt_llm::runtime::LoraModule::flattenedInOutSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule5inDimEv">tensorrt_llm::runtime::LoraModule::inDim (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule10inDimFirstEv">tensorrt_llm::runtime::LoraModule::inDimFirst (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule6inSizeE10SizeType32">tensorrt_llm::runtime::LoraModule::inSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule12inTpSplitDimEv">tensorrt_llm::runtime::LoraModule::inTpSplitDim (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule18localInAdapterSizeE10SizeType3210SizeType32">tensorrt_llm::runtime::LoraModule::localInAdapterSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule10localInDimE10SizeType32">tensorrt_llm::runtime::LoraModule::localInDim (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule14localInOutSizeE10SizeType3210SizeType32">tensorrt_llm::runtime::LoraModule::localInOutSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule11localInSizeE10SizeType3210SizeType32">tensorrt_llm::runtime::LoraModule::localInSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule19localOutAdapterSizeE10SizeType3210SizeType32">tensorrt_llm::runtime::LoraModule::localOutAdapterSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule11localOutDimE10SizeType32">tensorrt_llm::runtime::LoraModule::localOutDim (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule12localOutSizeE10SizeType3210SizeType32">tensorrt_llm::runtime::LoraModule::localOutSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule15localScalesSizeE10SizeType32b">tensorrt_llm::runtime::LoraModule::localScalesSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule14localTotalSizeE10SizeType3210SizeType32b">tensorrt_llm::runtime::LoraModule::localTotalSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10LoraModule">tensorrt_llm::runtime::LoraModule::LoraModule (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleEv">[2]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule6mInDimE">tensorrt_llm::runtime::LoraModule::mInDim (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule11mInDimFirstE">tensorrt_llm::runtime::LoraModule::mInDimFirst (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule13mInTpSplitDimE">tensorrt_llm::runtime::LoraModule::mInTpSplitDim (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleTypeE">tensorrt_llm::runtime::LoraModule::ModuleType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kATTN_DENSEE">tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_DENSE (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_KE">tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_K (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_QE">tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_Q (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kATTN_QKVE">tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_QKV (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_VE">tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_V (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType17kCROSS_ATTN_DENSEE">tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_DENSE (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_KE">tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_K (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_QE">tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_Q (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType15kCROSS_ATTN_QKVE">tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_QKV (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_VE">tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_V (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType8kINVALIDE">tensorrt_llm::runtime::LoraModule::ModuleType::kINVALID (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_4H_TO_HE">tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_4H_TO_H (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kMLP_GATEE">tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_GATE (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_GATE_UPE">tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_GATE_UP (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_H_TO_4HE">tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_H_TO_4H (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kMLP_ROUTERE">tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_ROUTER (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMOE_4H_TO_HE">tensorrt_llm::runtime::LoraModule::ModuleType::kMOE_4H_TO_H (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kMOE_GATEE">tensorrt_llm::runtime::LoraModule::ModuleType::kMOE_GATE (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMOE_H_TO_4HE">tensorrt_llm::runtime::LoraModule::ModuleType::kMOE_H_TO_4H (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kMOE_ROUTERE">tensorrt_llm::runtime::LoraModule::ModuleType::kMOE_ROUTER (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule7mOutDimE">tensorrt_llm::runtime::LoraModule::mOutDim (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule12mOutDimFirstE">tensorrt_llm::runtime::LoraModule::mOutDimFirst (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule14mOutTpSplitDimE">tensorrt_llm::runtime::LoraModule::mOutTpSplitDim (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule5mTypeE">tensorrt_llm::runtime::LoraModule::mType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule4nameEv">tensorrt_llm::runtime::LoraModule::name (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModuleaSERK10LoraModule">tensorrt_llm::runtime::LoraModule::operator= (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule6outDimEv">tensorrt_llm::runtime::LoraModule::outDim (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule11outDimFirstEv">tensorrt_llm::runtime::LoraModule::outDimFirst (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule7outSizeE10SizeType32">tensorrt_llm::runtime::LoraModule::outSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule13outTpSplitDimEv">tensorrt_llm::runtime::LoraModule::outTpSplitDim (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule9TensorPtrE">tensorrt_llm::runtime::LoraModule::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10ModuleType">tensorrt_llm::runtime::LoraModule::toModuleName (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10SizeType32">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleTypeERKNSt11string_viewE">tensorrt_llm::runtime::LoraModule::toModuleType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule5valueEv">tensorrt_llm::runtime::LoraModule::value (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14LoraTaskIdTypeE">tensorrt_llm::runtime::LoraTaskIdType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12MedusaModuleE">tensorrt_llm::runtime::MedusaModule (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime12MedusaModule16getMedusaChoicesEv">tensorrt_llm::runtime::MedusaModule::getMedusaChoices (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12MedusaModule21mDefaultMedusaChoicesE">tensorrt_llm::runtime::MedusaModule::mDefaultMedusaChoices (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12MedusaModule13MedusaChoicesE">tensorrt_llm::runtime::MedusaModule::MedusaChoices (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleE10SizeType3210SizeType32">tensorrt_llm::runtime::MedusaModule::MedusaModule (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12MedusaModule9TensorPtrE">tensorrt_llm::runtime::MedusaModule::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCountersE">tensorrt_llm::runtime::MemoryCounters (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv10SizeType32">tensorrt_llm::runtime::MemoryCounters::allocate (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType10SizeType32">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE10SizeType32i">tensorrt_llm::runtime::MemoryCounters::bytesToString (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv10SizeType32">tensorrt_llm::runtime::MemoryCounters::deallocate (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType10SizeType32">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters8DiffTypeE">tensorrt_llm::runtime::MemoryCounters::DiffType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getCpuEv">tensorrt_llm::runtime::MemoryCounters::getCpu (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getCpuDiffEv">tensorrt_llm::runtime::MemoryCounters::getCpuDiff (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getGpuEv">tensorrt_llm::runtime::MemoryCounters::getGpu (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getGpuDiffEv">tensorrt_llm::runtime::MemoryCounters::getGpuDiff (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters11getInstanceEv">tensorrt_llm::runtime::MemoryCounters::getInstance (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14MemoryCounters9getPinnedEv">tensorrt_llm::runtime::MemoryCounters::getPinned (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedDiffEv">tensorrt_llm::runtime::MemoryCounters::getPinnedDiff (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedPoolEv">tensorrt_llm::runtime::MemoryCounters::getPinnedPool (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14MemoryCounters17getPinnedPoolDiffEv">tensorrt_llm::runtime::MemoryCounters::getPinnedPoolDiff (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getUVMEv">tensorrt_llm::runtime::MemoryCounters::getUVM (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getUVMDiffEv">tensorrt_llm::runtime::MemoryCounters::getUVMDiff (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mCpuE">tensorrt_llm::runtime::MemoryCounters::mCpu (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mCpuDiffE">tensorrt_llm::runtime::MemoryCounters::mCpuDiff (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters14MemoryCountersEv">tensorrt_llm::runtime::MemoryCounters::MemoryCounters (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mGpuE">tensorrt_llm::runtime::MemoryCounters::mGpu (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mGpuDiffE">tensorrt_llm::runtime::MemoryCounters::mGpuDiff (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters7mPinnedE">tensorrt_llm::runtime::MemoryCounters::mPinned (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedDiffE">tensorrt_llm::runtime::MemoryCounters::mPinnedDiff (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedPoolE">tensorrt_llm::runtime::MemoryCounters::mPinnedPool (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters15mPinnedPoolDiffE">tensorrt_llm::runtime::MemoryCounters::mPinnedPoolDiff (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mUVME">tensorrt_llm::runtime::MemoryCounters::mUVM (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mUVMDiffE">tensorrt_llm::runtime::MemoryCounters::mUVMDiff (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters10SizeType32E">tensorrt_llm::runtime::MemoryCounters::SizeType32 (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14MemoryCounters8toStringEv">tensorrt_llm::runtime::MemoryCounters::toString (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10MemoryTypeE">tensorrt_llm::runtime::MemoryType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10MemoryType4kCPUE">tensorrt_llm::runtime::MemoryType::kCPU (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10MemoryType4kGPUE">tensorrt_llm::runtime::MemoryType::kGPU (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10MemoryType7kPINNEDE">tensorrt_llm::runtime::MemoryType::kPINNED (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10MemoryType11kPINNEDPOOLE">tensorrt_llm::runtime::MemoryType::kPINNEDPOOL (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10MemoryType4kUVME">tensorrt_llm::runtime::MemoryType::kUVM (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime16MemoryTypeStringE">tensorrt_llm::runtime::MemoryTypeString (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEEE">tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kCPU&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEE5valueE">tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kCPU&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEEE">tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kGPU&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEE5valueE">tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kGPU&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEEE">tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNED&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEE5valueE">tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNED&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType11kPINNEDPOOLEEE">tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNEDPOOL&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType11kPINNEDPOOLEE5valueE">tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNEDPOOL&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEEE">tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kUVM&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEE5valueE">tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kUVM&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18MemsetConfiguratorE">tensorrt_llm::runtime::MemsetConfigurator (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator8mAddressE">tensorrt_llm::runtime::MemsetConfigurator::mAddress (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator18MemsetConfiguratorE11CUdeviceptr6size_t7uint8_t8CUstream">tensorrt_llm::runtime::MemsetConfigurator::MemsetConfigurator (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator10mFirstTimeE">tensorrt_llm::runtime::MemsetConfigurator::mFirstTime (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator5mSizeE">tensorrt_llm::runtime::MemsetConfigurator::mSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator7mStreamE">tensorrt_llm::runtime::MemsetConfigurator::mStream (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator6mValueE">tensorrt_llm::runtime::MemsetConfigurator::mValue (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator5setupE28CUmemGenericAllocationHandle">tensorrt_llm::runtime::MemsetConfigurator::setup (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator8teardownE28CUmemGenericAllocationHandleb">tensorrt_llm::runtime::MemsetConfigurator::teardown (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfigE">tensorrt_llm::runtime::ModelConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig20computeContextLogitsEb">tensorrt_llm::runtime::ModelConfig::computeContextLogits (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig20computeContextLogitsEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig23computeGenerationLogitsEb">tensorrt_llm::runtime::ModelConfig::computeGenerationLogits (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig23computeGenerationLogitsEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig16countLocalLayersE9LayerType10SizeType3210SizeType32">tensorrt_llm::runtime::ModelConfig::countLocalLayers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig20countLowerRankLayersE9LayerType10SizeType3210SizeType32">tensorrt_llm::runtime::ModelConfig::countLowerRankLayers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig32disableSeamlessLookaheadDecodingEv">tensorrt_llm::runtime::ModelConfig::disableSeamlessLookaheadDecoding (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig31enableSeamlessLookaheadDecodingE10SizeType32">tensorrt_llm::runtime::ModelConfig::enableSeamlessLookaheadDecoding (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getContextFMHAEv">tensorrt_llm::runtime::ModelConfig::getContextFMHA (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getDataTypeEv">tensorrt_llm::runtime::ModelConfig::getDataType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getEncoderHiddenSizeEv">tensorrt_llm::runtime::ModelConfig::getEncoderHiddenSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getFirstLocalLayerE10SizeType3210SizeType32">tensorrt_llm::runtime::ModelConfig::getFirstLocalLayer (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getGemmAllReduceDtypeEv">tensorrt_llm::runtime::ModelConfig::getGemmAllReduceDtype (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getHiddenSizeEv">tensorrt_llm::runtime::ModelConfig::getHiddenSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getKVCacheTypeEv">tensorrt_llm::runtime::ModelConfig::getKVCacheType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getKvDataTypeEv">tensorrt_llm::runtime::ModelConfig::getKvDataType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getLayerTypesEv">tensorrt_llm::runtime::ModelConfig::getLayerTypes (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getLogitsDtypeEv">tensorrt_llm::runtime::ModelConfig::getLogitsDtype (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getLoraModulesEv">tensorrt_llm::runtime::ModelConfig::getLoraModules (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getManageWeightsTypeEv">tensorrt_llm::runtime::ModelConfig::getManageWeightsType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxBatchSizeEv">tensorrt_llm::runtime::ModelConfig::getMaxBatchSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxBeamWidthEv">tensorrt_llm::runtime::ModelConfig::getMaxBeamWidth (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig25getMaxDecodingDraftTokensEv">tensorrt_llm::runtime::ModelConfig::getMaxDecodingDraftTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getMaxDecodingTokensEv">tensorrt_llm::runtime::ModelConfig::getMaxDecodingTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig16getMaxEncoderLenEv">tensorrt_llm::runtime::ModelConfig::getMaxEncoderLen (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getMaxInputLenEv">tensorrt_llm::runtime::ModelConfig::getMaxInputLen (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getMaxLoraRankEv">tensorrt_llm::runtime::ModelConfig::getMaxLoraRank (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxNumTokensEv">tensorrt_llm::runtime::ModelConfig::getMaxNumTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig24getMaxPositionEmbeddingsEv">tensorrt_llm::runtime::ModelConfig::getMaxPositionEmbeddings (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig30getMaxPromptEmbeddingTableSizeEv">tensorrt_llm::runtime::ModelConfig::getMaxPromptEmbeddingTableSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig17getMaxSequenceLenEv">tensorrt_llm::runtime::ModelConfig::getMaxSequenceLen (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig16getMlpHiddenSizeEv">tensorrt_llm::runtime::ModelConfig::getMlpHiddenSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getModelNameEv">tensorrt_llm::runtime::ModelConfig::getModelName (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getModelVariantEv">tensorrt_llm::runtime::ModelConfig::getModelVariant (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getNbAttentionLayersE10SizeType3210SizeType32">tensorrt_llm::runtime::ModelConfig::getNbAttentionLayers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig10getNbHeadsEv">tensorrt_llm::runtime::ModelConfig::getNbHeads (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getNbKvHeadsE10SizeType32">tensorrt_llm::runtime::ModelConfig::getNbKvHeads (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getNbLayersE10SizeType3210SizeType32">tensorrt_llm::runtime::ModelConfig::getNbLayers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getNbRnnLayersE10SizeType3210SizeType32">tensorrt_llm::runtime::ModelConfig::getNbRnnLayers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig27getNumKvHeadsForGivenLayersERKNSt6vectorI10SizeType32EEb">tensorrt_llm::runtime::ModelConfig::getNumKvHeadsForGivenLayers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getNumKvHeadsPerLayerEv">tensorrt_llm::runtime::ModelConfig::getNumKvHeadsPerLayer (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getNumKvHeadsPerLayerLocalRangeE10SizeType3210SizeType32b">tensorrt_llm::runtime::ModelConfig::getNumKvHeadsPerLayerLocalRange (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getNumLanguagesEv">tensorrt_llm::runtime::ModelConfig::getNumLanguages (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig25getOptProfilesSplitPointsEv">tensorrt_llm::runtime::ModelConfig::getOptProfilesSplitPoints (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig19getPagedContextFMHAEv">tensorrt_llm::runtime::ModelConfig::getPagedContextFMHA (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getPpReduceScatterEv">tensorrt_llm::runtime::ModelConfig::getPpReduceScatter (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getQuantModeEv">tensorrt_llm::runtime::ModelConfig::getQuantMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getRnnConfigEv">tensorrt_llm::runtime::ModelConfig::getRnnConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getRotaryEmbeddingDimEv">tensorrt_llm::runtime::ModelConfig::getRotaryEmbeddingDim (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getSizePerHeadEv">tensorrt_llm::runtime::ModelConfig::getSizePerHead (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig26getSpeculativeDecodingModeEv">tensorrt_llm::runtime::ModelConfig::getSpeculativeDecodingMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig28getSpeculativeDecodingModuleEv">tensorrt_llm::runtime::ModelConfig::getSpeculativeDecodingModule (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig31getSpeculativeDecodingModulePtrEv">tensorrt_llm::runtime::ModelConfig::getSpeculativeDecodingModulePtr (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getSpeculativeDecodingModulePtrEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig17getTokensPerBlockEv">tensorrt_llm::runtime::ModelConfig::getTokensPerBlock (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getVocabSizeEv">tensorrt_llm::runtime::ModelConfig::getVocabSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getVocabSizePaddedE10SizeType32">tensorrt_llm::runtime::ModelConfig::getVocabSizePadded (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig12hasRnnConfigEv">tensorrt_llm::runtime::ModelConfig::hasRnnConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig28hasSpeculativeDecodingModuleEv">tensorrt_llm::runtime::ModelConfig::hasSpeculativeDecodingModule (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig19isContinuousKVCacheEv">tensorrt_llm::runtime::ModelConfig::isContinuousKVCache (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig16isKVCacheEnabledEv">tensorrt_llm::runtime::ModelConfig::isKVCacheEnabled (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig12isMultiModalEv">tensorrt_llm::runtime::ModelConfig::isMultiModal (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig14isPagedKVCacheEv">tensorrt_llm::runtime::ModelConfig::isPagedKVCache (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig10isRnnBasedEv">tensorrt_llm::runtime::ModelConfig::isRnnBased (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig18isTransformerBasedEv">tensorrt_llm::runtime::ModelConfig::isTransformerBased (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig9isWhisperEv">tensorrt_llm::runtime::ModelConfig::isWhisper (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig29kDEFAULT_NUM_TOKENS_PER_BLOCKE">tensorrt_llm::runtime::ModelConfig::kDEFAULT_NUM_TOKENS_PER_BLOCK (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig26kOPT_PROFILES_SPLIT_POINTSE">tensorrt_llm::runtime::ModelConfig::kOPT_PROFILES_SPLIT_POINTS (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheTypeE">tensorrt_llm::runtime::ModelConfig::KVCacheType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType11kCONTINUOUSE">tensorrt_llm::runtime::ModelConfig::KVCacheType::kCONTINUOUS (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType9kDISABLEDE">tensorrt_llm::runtime::ModelConfig::KVCacheType::kDISABLED (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType6kPAGEDE">tensorrt_llm::runtime::ModelConfig::KVCacheType::kPAGED (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig21KVCacheTypeFromStringENSt6stringE">tensorrt_llm::runtime::ModelConfig::KVCacheTypeFromString (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerTypeE">tensorrt_llm::runtime::ModelConfig::LayerType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType10kATTENTIONE">tensorrt_llm::runtime::ModelConfig::LayerType::kATTENTION (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType7kLINEARE">tensorrt_llm::runtime::ModelConfig::LayerType::kLINEAR (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType5kNOOPE">tensorrt_llm::runtime::ModelConfig::LayerType::kNOOP (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType10kRECURRENTE">tensorrt_llm::runtime::ModelConfig::LayerType::kRECURRENT (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsTypeE">tensorrt_llm::runtime::ModelConfig::ManageWeightsType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsType9kDisabledE">tensorrt_llm::runtime::ModelConfig::ManageWeightsType::kDisabled (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsType8kEnabledE">tensorrt_llm::runtime::ModelConfig::ManageWeightsType::kEnabled (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig21mComputeContextLogitsE">tensorrt_llm::runtime::ModelConfig::mComputeContextLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig24mComputeGenerationLogitsE">tensorrt_llm::runtime::ModelConfig::mComputeGenerationLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12mContextFMHAE">tensorrt_llm::runtime::ModelConfig::mContextFMHA (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig9mDataTypeE">tensorrt_llm::runtime::ModelConfig::mDataType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig18mEncoderHiddenSizeE">tensorrt_llm::runtime::ModelConfig::mEncoderHiddenSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig19mGemmAllReduceDtypeE">tensorrt_llm::runtime::ModelConfig::mGemmAllReduceDtype (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig11mHiddenSizeE">tensorrt_llm::runtime::ModelConfig::mHiddenSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12mInputPackedE">tensorrt_llm::runtime::ModelConfig::mInputPacked (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12mKVCacheTypeE">tensorrt_llm::runtime::ModelConfig::mKVCacheType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig11mLayerTypesE">tensorrt_llm::runtime::ModelConfig::mLayerTypes (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12mLogitsDtypeE">tensorrt_llm::runtime::ModelConfig::mLogitsDtype (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12mLoraModulesE">tensorrt_llm::runtime::ModelConfig::mLoraModules (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig18mManageWeightsTypeE">tensorrt_llm::runtime::ModelConfig::mManageWeightsType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxBatchSizeE">tensorrt_llm::runtime::ModelConfig::mMaxBatchSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxBeamWidthE">tensorrt_llm::runtime::ModelConfig::mMaxBeamWidth (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig14mMaxEncoderLenE">tensorrt_llm::runtime::ModelConfig::mMaxEncoderLen (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12mMaxInputLenE">tensorrt_llm::runtime::ModelConfig::mMaxInputLen (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12mMaxLoraRankE">tensorrt_llm::runtime::ModelConfig::mMaxLoraRank (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxNumTokensE">tensorrt_llm::runtime::ModelConfig::mMaxNumTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig22mMaxPositionEmbeddingsE">tensorrt_llm::runtime::ModelConfig::mMaxPositionEmbeddings (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig28mMaxPromptEmbeddingTableSizeE">tensorrt_llm::runtime::ModelConfig::mMaxPromptEmbeddingTableSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig15mMaxSequenceLenE">tensorrt_llm::runtime::ModelConfig::mMaxSequenceLen (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig14mMlpHiddenSizeE">tensorrt_llm::runtime::ModelConfig::mMlpHiddenSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig10mModelNameE">tensorrt_llm::runtime::ModelConfig::mModelName (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig13mModelVariantE">tensorrt_llm::runtime::ModelConfig::mModelVariant (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig18mNbAttentionLayersE">tensorrt_llm::runtime::ModelConfig::mNbAttentionLayers (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig8mNbHeadsE">tensorrt_llm::runtime::ModelConfig::mNbHeads (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig9mNbLayersE">tensorrt_llm::runtime::ModelConfig::mNbLayers (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12mNbRnnLayersE">tensorrt_llm::runtime::ModelConfig::mNbRnnLayers (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig28mNumKvHeadsPerAttentionLayerE">tensorrt_llm::runtime::ModelConfig::mNumKvHeadsPerAttentionLayer (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig33mNumKvHeadsPerCrossAttentionLayerE">tensorrt_llm::runtime::ModelConfig::mNumKvHeadsPerCrossAttentionLayer (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig13mNumLanguagesE">tensorrt_llm::runtime::ModelConfig::mNumLanguages (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE">tensorrt_llm::runtime::ModelConfig::ModelConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariantE">tensorrt_llm::runtime::ModelConfig::ModelVariant (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant8kChatGlmE">tensorrt_llm::runtime::ModelConfig::ModelVariant::kChatGlm (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant7kEncDecE">tensorrt_llm::runtime::ModelConfig::ModelVariant::kEncDec (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant4kGlmE">tensorrt_llm::runtime::ModelConfig::ModelVariant::kGlm (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant4kGptE">tensorrt_llm::runtime::ModelConfig::ModelVariant::kGpt (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant6kMambaE">tensorrt_llm::runtime::ModelConfig::ModelVariant::kMamba (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant15kRecurrentGemmaE">tensorrt_llm::runtime::ModelConfig::ModelVariant::kRecurrentGemma (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig17mPagedContextFMHAE">tensorrt_llm::runtime::ModelConfig::mPagedContextFMHA (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig11mPagedStateE">tensorrt_llm::runtime::ModelConfig::mPagedState (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig16mPpReduceScatterE">tensorrt_llm::runtime::ModelConfig::mPpReduceScatter (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig10mQuantModeE">tensorrt_llm::runtime::ModelConfig::mQuantMode (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig10mRnnConfigE">tensorrt_llm::runtime::ModelConfig::mRnnConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig19mRotaryEmbeddingDimE">tensorrt_llm::runtime::ModelConfig::mRotaryEmbeddingDim (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12mSizePerHeadE">tensorrt_llm::runtime::ModelConfig::mSizePerHead (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig20mSkipCrossAttnBlocksE">tensorrt_llm::runtime::ModelConfig::mSkipCrossAttnBlocks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig24mSpeculativeDecodingModeE">tensorrt_llm::runtime::ModelConfig::mSpeculativeDecodingMode (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig26mSpeculativeDecodingModuleE">tensorrt_llm::runtime::ModelConfig::mSpeculativeDecodingModule (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig15mTokensPerBlockE">tensorrt_llm::runtime::ModelConfig::mTokensPerBlock (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig18mUseCrossAttentionE">tensorrt_llm::runtime::ModelConfig::mUseCrossAttention (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig23mUseGemmAllReducePluginE">tensorrt_llm::runtime::ModelConfig::mUseGemmAllReducePlugin (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig22mUseGptAttentionPluginE">tensorrt_llm::runtime::ModelConfig::mUseGptAttentionPlugin (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig14mUseLoraPluginE">tensorrt_llm::runtime::ModelConfig::mUseLoraPlugin (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig21mUseMambaConv1dPluginE">tensorrt_llm::runtime::ModelConfig::mUseMambaConv1dPlugin (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig9mUseMropeE">tensorrt_llm::runtime::ModelConfig::mUseMrope (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig21mUsePositionEmbeddingE">tensorrt_llm::runtime::ModelConfig::mUsePositionEmbedding (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig18mUseShapeInferenceE">tensorrt_llm::runtime::ModelConfig::mUseShapeInference (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig22mUseTokenTypeEmbeddingE">tensorrt_llm::runtime::ModelConfig::mUseTokenTypeEmbedding (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig10mVocabSizeE">tensorrt_llm::runtime::ModelConfig::mVocabSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig30resetSpeculativeDecodingModuleEv">tensorrt_llm::runtime::ModelConfig::resetSpeculativeDecodingModule (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfigE">tensorrt_llm::runtime::ModelConfig::RnnConfig (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig10convKernelE">tensorrt_llm::runtime::ModelConfig::RnnConfig::convKernel (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig14rnnConvDimSizeE">tensorrt_llm::runtime::ModelConfig::RnnConfig::rnnConvDimSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig11rnnHeadSizeE">tensorrt_llm::runtime::ModelConfig::RnnConfig::rnnHeadSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig13rnnHiddenSizeE">tensorrt_llm::runtime::ModelConfig::RnnConfig::rnnHiddenSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig9stateSizeE">tensorrt_llm::runtime::ModelConfig::RnnConfig::stateSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig14setContextFMHAEb">tensorrt_llm::runtime::ModelConfig::setContextFMHA (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig20setEncoderHiddenSizeE10SizeType32">tensorrt_llm::runtime::ModelConfig::setEncoderHiddenSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig21setGemmAllReduceDtypeEN8nvinfer18DataTypeE">tensorrt_llm::runtime::ModelConfig::setGemmAllReduceDtype (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig14setKVCacheTypeE11KVCacheType">tensorrt_llm::runtime::ModelConfig::setKVCacheType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig13setLayerTypesERKNSt6vectorI9LayerTypeEE">tensorrt_llm::runtime::ModelConfig::setLayerTypes (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLogitsDtypeEN8nvinfer18DataTypeE">tensorrt_llm::runtime::ModelConfig::setLogitsDtype (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLoraModulesERKNSt6vectorI10LoraModuleEE">tensorrt_llm::runtime::ModelConfig::setLoraModules (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig20setManageWeightsTypeEK17ManageWeightsType">tensorrt_llm::runtime::ModelConfig::setManageWeightsType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBatchSizeE10SizeType32">tensorrt_llm::runtime::ModelConfig::setMaxBatchSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBeamWidthE10SizeType32">tensorrt_llm::runtime::ModelConfig::setMaxBeamWidth (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMaxEncoderLenE10SizeType32">tensorrt_llm::runtime::ModelConfig::setMaxEncoderLen (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxInputLenE10SizeType32">tensorrt_llm::runtime::ModelConfig::setMaxInputLen (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxLoraRankE10SizeType32">tensorrt_llm::runtime::ModelConfig::setMaxLoraRank (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxNumTokensENSt8optionalI10SizeType32EE">tensorrt_llm::runtime::ModelConfig::setMaxNumTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig24setMaxPositionEmbeddingsE10SizeType32">tensorrt_llm::runtime::ModelConfig::setMaxPositionEmbeddings (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig30setMaxPromptEmbeddingTableSizeE10SizeType32">tensorrt_llm::runtime::ModelConfig::setMaxPromptEmbeddingTableSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig17setMaxSequenceLenE10SizeType32">tensorrt_llm::runtime::ModelConfig::setMaxSequenceLen (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMlpHiddenSizeE10SizeType32">tensorrt_llm::runtime::ModelConfig::setMlpHiddenSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12setModelNameERKNSt6stringE">tensorrt_llm::runtime::ModelConfig::setModelName (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig15setModelVariantE12ModelVariant">tensorrt_llm::runtime::ModelConfig::setModelVariant (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig17setNbCrossKvHeadsE10SizeType32">tensorrt_llm::runtime::ModelConfig::setNbCrossKvHeads (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12setNbKvHeadsE10SizeType32">tensorrt_llm::runtime::ModelConfig::setNbKvHeads (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig26setNumKvHeadsPerCrossLayerERKNSt6vectorI10SizeType32EE">tensorrt_llm::runtime::ModelConfig::setNumKvHeadsPerCrossLayer (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig21setNumKvHeadsPerLayerERKNSt6vectorI10SizeType32EE">tensorrt_llm::runtime::ModelConfig::setNumKvHeadsPerLayer (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig15setNumLanguagesENSt8optionalI10SizeType32EE">tensorrt_llm::runtime::ModelConfig::setNumLanguages (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig19setPagedContextFMHAEb">tensorrt_llm::runtime::ModelConfig::setPagedContextFMHA (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig18setPpReduceScatterEb">tensorrt_llm::runtime::ModelConfig::setPpReduceScatter (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12setQuantModeEN6common9QuantModeE">tensorrt_llm::runtime::ModelConfig::setQuantMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12setRnnConfigERK9RnnConfig">tensorrt_llm::runtime::ModelConfig::setRnnConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig21setRotaryEmbeddingDimE10SizeType32">tensorrt_llm::runtime::ModelConfig::setRotaryEmbeddingDim (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig14setSizePerHeadE10SizeType32">tensorrt_llm::runtime::ModelConfig::setSizePerHead (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig22setSkipCrossAttnBlocksEb">tensorrt_llm::runtime::ModelConfig::setSkipCrossAttnBlocks (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig26setSpeculativeDecodingModeE23SpeculativeDecodingMode">tensorrt_llm::runtime::ModelConfig::setSpeculativeDecodingMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig28setSpeculativeDecodingModuleERKNSt10shared_ptrI25SpeculativeDecodingModuleEE">tensorrt_llm::runtime::ModelConfig::setSpeculativeDecodingModule (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig17setTokensPerBlockE10SizeType32">tensorrt_llm::runtime::ModelConfig::setTokensPerBlock (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseCrossAttentionEb">tensorrt_llm::runtime::ModelConfig::setUseCrossAttention (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig11setUseMropeEb">tensorrt_llm::runtime::ModelConfig::setUseMrope (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig23setUsePositionEmbeddingEb">tensorrt_llm::runtime::ModelConfig::setUsePositionEmbedding (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseShapeInferenceEb">tensorrt_llm::runtime::ModelConfig::setUseShapeInference (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig24setUseTokenTypeEmbeddingEb">tensorrt_llm::runtime::ModelConfig::setUseTokenTypeEmbedding (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig19skipCrossAttnBlocksEv">tensorrt_llm::runtime::ModelConfig::skipCrossAttnBlocks (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig24supportsInflightBatchingEv">tensorrt_llm::runtime::ModelConfig::supportsInflightBatching (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig17useCrossAttentionEv">tensorrt_llm::runtime::ModelConfig::useCrossAttention (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig22useGemmAllReducePluginEb">tensorrt_llm::runtime::ModelConfig::useGemmAllReducePlugin (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig22useGemmAllReducePluginEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig21useGptAttentionPluginEb">tensorrt_llm::runtime::ModelConfig::useGptAttentionPlugin (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig21useGptAttentionPluginEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig18useLanguageAdapterEv">tensorrt_llm::runtime::ModelConfig::useLanguageAdapter (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig13useLoraPluginEb">tensorrt_llm::runtime::ModelConfig::useLoraPlugin (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig13useLoraPluginEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig20useMambaConv1dPluginEb">tensorrt_llm::runtime::ModelConfig::useMambaConv1dPlugin (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig20useMambaConv1dPluginEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig8useMropeEv">tensorrt_llm::runtime::ModelConfig::useMrope (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig14usePackedInputEb">tensorrt_llm::runtime::ModelConfig::usePackedInput (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig14usePackedInputEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig13usePagedStateEb">tensorrt_llm::runtime::ModelConfig::usePagedState (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig13usePagedStateEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig20usePositionEmbeddingEv">tensorrt_llm::runtime::ModelConfig::usePositionEmbedding (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig15usePromptTuningEv">tensorrt_llm::runtime::ModelConfig::usePromptTuning (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig17useShapeInferenceEv">tensorrt_llm::runtime::ModelConfig::useShapeInference (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig21useTokenTypeEmbeddingEv">tensorrt_llm::runtime::ModelConfig::useTokenTypeEmbedding (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17MPI_group_barrierENSt3setIiEE">tensorrt_llm::runtime::MPI_group_barrier (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime21MulticastConfiguratorE">tensorrt_llm::runtime::MulticastConfigurator (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator11mBindOffsetE">tensorrt_llm::runtime::MulticastConfigurator::mBindOffset (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator7mDeviceE">tensorrt_llm::runtime::MulticastConfigurator::mDevice (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator10mMulticastE">tensorrt_llm::runtime::MulticastConfigurator::mMulticast (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator5mSizeE">tensorrt_llm::runtime::MulticastConfigurator::mSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator5setupE28CUmemGenericAllocationHandle">tensorrt_llm::runtime::MulticastConfigurator::setup (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator8teardownE28CUmemGenericAllocationHandleb">tensorrt_llm::runtime::MulticastConfigurator::teardown (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19OffloadConfiguratorE">tensorrt_llm::runtime::OffloadConfigurator (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator8mAddressE">tensorrt_llm::runtime::OffloadConfigurator::mAddress (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator14mBackedStorageE">tensorrt_llm::runtime::OffloadConfigurator::mBackedStorage (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator9mBackTypeE">tensorrt_llm::runtime::OffloadConfigurator::mBackType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator9mOndemandE">tensorrt_llm::runtime::OffloadConfigurator::mOndemand (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator5mSizeE">tensorrt_llm::runtime::OffloadConfigurator::mSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator7mStreamE">tensorrt_llm::runtime::OffloadConfigurator::mStream (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator19OffloadConfiguratorE11CUdeviceptr6size_t10MemoryType8CUstreamb">tensorrt_llm::runtime::OffloadConfigurator::OffloadConfigurator (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator5setupE28CUmemGenericAllocationHandle">tensorrt_llm::runtime::OffloadConfigurator::setup (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator8teardownE28CUmemGenericAllocationHandleb">tensorrt_llm::runtime::OffloadConfigurator::teardown (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK10LoraModule">tensorrt_llm::runtime::operator&lt;&lt; (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK26LoraCachePageManagerConfig">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE">[4]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN9LoraCache21TaskLayerModuleConfigE">[5]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime18PointerElementTypeE">tensorrt_llm::runtime::PointerElementType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18PromptTuningParamsE">tensorrt_llm::runtime::PromptTuningParams (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb">tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr">tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18PromptTuningParams10SizeType32E">tensorrt_llm::runtime::PromptTuningParams::SizeType32 (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18PromptTuningParams9TensorPtrE">tensorrt_llm::runtime::PromptTuningParams::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9RawEngineE">tensorrt_llm::runtime::RawEngine (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9RawEngine10getAddressEv">tensorrt_llm::runtime::RawEngine::getAddress (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9RawEngine13getHostMemoryEv">tensorrt_llm::runtime::RawEngine::getHostMemory (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9RawEngine23getManagedWeightsMapOptEv">tensorrt_llm::runtime::RawEngine::getManagedWeightsMapOpt (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9RawEngine7getPathEv">tensorrt_llm::runtime::RawEngine::getPath (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9RawEngine10getPathOptEv">tensorrt_llm::runtime::RawEngine::getPathOpt (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9RawEngine7getSizeEv">tensorrt_llm::runtime::RawEngine::getSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9RawEngine7getTypeEv">tensorrt_llm::runtime::RawEngine::getType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9RawEngine11mEngineAddrE">tensorrt_llm::runtime::RawEngine::mEngineAddr (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9RawEngine13mEngineBufferE">tensorrt_llm::runtime::RawEngine::mEngineBuffer (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9RawEngine11mEnginePathE">tensorrt_llm::runtime::RawEngine::mEnginePath (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9RawEngine11mEngineSizeE">tensorrt_llm::runtime::RawEngine::mEngineSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9RawEngine18mManagedWeightsMapE">tensorrt_llm::runtime::RawEngine::mManagedWeightsMap (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9RawEngine5mTypeE">tensorrt_llm::runtime::RawEngine::mType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineENSt10filesystem4pathE">tensorrt_llm::runtime::RawEngine::RawEngine (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKN8nvinfer111IHostMemoryE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKvNSt6size_tE">[2]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9RawEngine20setManagedWeightsMapENSt3mapINSt6stringEN12tensorrt_llm8executor6TensorEEE">tensorrt_llm::runtime::RawEngine::setManagedWeightsMap (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9RawEngine7setPathENSt10filesystem4pathE">tensorrt_llm::runtime::RawEngine::setPath (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9RawEngine4TypeE">tensorrt_llm::runtime::RawEngine::Type (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9RawEngine4Type15AddressWithSizeE">tensorrt_llm::runtime::RawEngine::Type::AddressWithSize (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9RawEngine4Type8FilePathE">tensorrt_llm::runtime::RawEngine::Type::FilePath (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9RawEngine4Type10HostMemoryE">tensorrt_llm::runtime::RawEngine::Type::HostMemory (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11RequestTypeE">tensorrt_llm::runtime::RequestType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11RequestType8kCONTEXTE">tensorrt_llm::runtime::RequestType::kCONTEXT (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11RequestType11kGENERATIONE">tensorrt_llm::runtime::RequestType::kGENERATION (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15RuntimeDefaultsE">tensorrt_llm::runtime::RuntimeDefaults (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults21maxAttentionWindowVecE">tensorrt_llm::runtime::RuntimeDefaults::maxAttentionWindowVec (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalI10SizeType32EE">tensorrt_llm::runtime::RuntimeDefaults::RuntimeDefaults (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15sinkTokenLengthE">tensorrt_llm::runtime::RuntimeDefaults::sinkTokenLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfigE">tensorrt_llm::runtime::SamplingConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig23beamSearchDiversityRateE">tensorrt_llm::runtime::SamplingConfig::beamSearchDiversityRate (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig9beamWidthE">tensorrt_llm::runtime::SamplingConfig::beamWidth (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig14beamWidthArrayE">tensorrt_llm::runtime::SamplingConfig::beamWidthArray (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig11cumLogProbsE">tensorrt_llm::runtime::SamplingConfig::cumLogProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig24draftAcceptanceThresholdE">tensorrt_llm::runtime::SamplingConfig::draftAcceptanceThreshold (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig13earlyStoppingE">tensorrt_llm::runtime::SamplingConfig::earlyStopping (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig9FloatTypeE">tensorrt_llm::runtime::SamplingConfig::FloatType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig16frequencyPenaltyE">tensorrt_llm::runtime::SamplingConfig::frequencyPenalty (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T">tensorrt_llm::runtime::SamplingConfig::fuseValues (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14SamplingConfig15getMaxBeamWidthEv">tensorrt_llm::runtime::SamplingConfig::getMaxBeamWidth (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14SamplingConfig17getNumReturnBeamsEv">tensorrt_llm::runtime::SamplingConfig::getNumReturnBeams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig13lengthPenaltyE">tensorrt_llm::runtime::SamplingConfig::lengthPenalty (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig9minLengthE">tensorrt_llm::runtime::SamplingConfig::minLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig4minPE">tensorrt_llm::runtime::SamplingConfig::minP (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig17noRepeatNgramSizeE">tensorrt_llm::runtime::SamplingConfig::noRepeatNgramSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig17normalizeLogProbsE">tensorrt_llm::runtime::SamplingConfig::normalizeLogProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig18numReturnSequencesE">tensorrt_llm::runtime::SamplingConfig::numReturnSequences (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14SamplingConfigeqERK14SamplingConfig">tensorrt_llm::runtime::SamplingConfig::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig6OptVecE">tensorrt_llm::runtime::SamplingConfig::OptVec (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig19originalTemperatureE">tensorrt_llm::runtime::SamplingConfig::originalTemperature (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig14outputLogProbsE">tensorrt_llm::runtime::SamplingConfig::outputLogProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig15presencePenaltyE">tensorrt_llm::runtime::SamplingConfig::presencePenalty (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig18promptIgnoreLengthE">tensorrt_llm::runtime::SamplingConfig::promptIgnoreLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig10randomSeedE">tensorrt_llm::runtime::SamplingConfig::randomSeed (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig17repetitionPenaltyE">tensorrt_llm::runtime::SamplingConfig::repetitionPenalty (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigE10SizeType32">tensorrt_llm::runtime::SamplingConfig::SamplingConfig (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKN8executor14SamplingConfigERKNSt8optionalIN8executor25ExternalDraftTokensConfigEEE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKNSt6vectorI14SamplingConfigEE">[2]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig11temperatureE">tensorrt_llm::runtime::SamplingConfig::temperature (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topKE">tensorrt_llm::runtime::SamplingConfig::topK (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig15topKMedusaHeadsE">tensorrt_llm::runtime::SamplingConfig::topKMedusaHeads (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topPE">tensorrt_llm::runtime::SamplingConfig::topP (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig9topPDecayE">tensorrt_llm::runtime::SamplingConfig::topPDecay (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig7topPMinE">tensorrt_llm::runtime::SamplingConfig::topPMin (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig12topPResetIdsE">tensorrt_llm::runtime::SamplingConfig::topPResetIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig16useDefaultValuesEbRK6OptVecI1TE1T">tensorrt_llm::runtime::SamplingConfig::useDefaultValues (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig8validateEv">tensorrt_llm::runtime::SamplingConfig::validate (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE">tensorrt_llm::runtime::SamplingConfig::validateVec (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25setVirtualMemoryAllocatorERKNSt6stringEN26CudaVirtualMemoryAllocator11RestoreModeENSt10shared_ptrI10CudaStreamEE">tensorrt_llm::runtime::setVirtualMemoryAllocator (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10SizeType32E">tensorrt_llm::runtime::SizeType32 (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10SizeType64E">tensorrt_llm::runtime::SizeType64 (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingModeE">tensorrt_llm::runtime::SpeculativeDecodingMode (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9allBitSetE14UnderlyingType">tensorrt_llm::runtime::SpeculativeDecodingMode::allBitSet (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9anyBitSetE14UnderlyingType">tensorrt_llm::runtime::SpeculativeDecodingMode::anyBitSet (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode19DraftTokensExternalEv">tensorrt_llm::runtime::SpeculativeDecodingMode::DraftTokensExternal (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode5EagleEv">tensorrt_llm::runtime::SpeculativeDecodingMode::Eagle (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode19ExplicitDraftTokensEv">tensorrt_llm::runtime::SpeculativeDecodingMode::ExplicitDraftTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode14hasDraftLogitsEv">tensorrt_llm::runtime::SpeculativeDecodingMode::hasDraftLogits (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21isDraftTokensExternalEv">tensorrt_llm::runtime::SpeculativeDecodingMode::isDraftTokensExternal (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode7isEagleEv">tensorrt_llm::runtime::SpeculativeDecodingMode::isEagle (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21isExplicitDraftTokensEv">tensorrt_llm::runtime::SpeculativeDecodingMode::isExplicitDraftTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19isLookaheadDecodingEv">tensorrt_llm::runtime::SpeculativeDecodingMode::isLookaheadDecoding (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode8isMedusaEv">tensorrt_llm::runtime::SpeculativeDecodingMode::isMedusa (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode6isNoneEv">tensorrt_llm::runtime::SpeculativeDecodingMode::isNone (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode20kDraftTokensExternalE">tensorrt_llm::runtime::SpeculativeDecodingMode::kDraftTokensExternal (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6kEagleE">tensorrt_llm::runtime::SpeculativeDecodingMode::kEagle (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode20kExplicitDraftTokensE">tensorrt_llm::runtime::SpeculativeDecodingMode::kExplicitDraftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode18kLookaheadDecodingE">tensorrt_llm::runtime::SpeculativeDecodingMode::kLookaheadDecoding (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode7kMedusaE">tensorrt_llm::runtime::SpeculativeDecodingMode::kMedusa (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode5kNoneE">tensorrt_llm::runtime::SpeculativeDecodingMode::kNone (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode17LookaheadDecodingEv">tensorrt_llm::runtime::SpeculativeDecodingMode::LookaheadDecoding (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6MedusaEv">tensorrt_llm::runtime::SpeculativeDecodingMode::Medusa (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6mStateE">tensorrt_llm::runtime::SpeculativeDecodingMode::mState (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode20needsDecoderPrologueEv">tensorrt_llm::runtime::SpeculativeDecodingMode::needsDecoderPrologue (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode18needsKVCacheRewindEv">tensorrt_llm::runtime::SpeculativeDecodingMode::needsKVCacheRewind (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode4NoneEv">tensorrt_llm::runtime::SpeculativeDecodingMode::None (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingModeeqERK23SpeculativeDecodingMode">tensorrt_llm::runtime::SpeculativeDecodingMode::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19predictsDraftTokensEv">tensorrt_llm::runtime::SpeculativeDecodingMode::predictsDraftTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21requiresAttentionMaskEv">tensorrt_llm::runtime::SpeculativeDecodingMode::requiresAttentionMask (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode23SpeculativeDecodingModeE14UnderlyingType">tensorrt_llm::runtime::SpeculativeDecodingMode::SpeculativeDecodingMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode14UnderlyingTypeE">tensorrt_llm::runtime::SpeculativeDecodingMode::UnderlyingType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode18updatesPositionIdsEv">tensorrt_llm::runtime::SpeculativeDecodingMode::updatesPositionIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19variableDraftLengthEv">tensorrt_llm::runtime::SpeculativeDecodingMode::variableDraftLength (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleE">tensorrt_llm::runtime::SpeculativeDecodingModule (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule21computeNumPackedMasksEv">tensorrt_llm::runtime::SpeculativeDecodingModule::computeNumPackedMasks (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule25getMaxDecodingDraftTokensEv">tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxDecodingDraftTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule20getMaxDecodingTokensEv">tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxDecodingTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule18getMaxDraftPathLenEv">tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxDraftPathLen (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule14getMaxNumPathsEv">tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxNumPaths (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule13getMaxPathLenEv">tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxPathLen (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule17getNumPackedMasksEv">tensorrt_llm::runtime::SpeculativeDecodingModule::getNumPackedMasks (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule23mMaxDecodingDraftTokensE">tensorrt_llm::runtime::SpeculativeDecodingModule::mMaxDecodingDraftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule16mMaxDraftPathLenE">tensorrt_llm::runtime::SpeculativeDecodingModule::mMaxDraftPathLen (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule18mMaxNumPackedMasksE">tensorrt_llm::runtime::SpeculativeDecodingModule::mMaxNumPackedMasks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule12mMaxNumPathsE">tensorrt_llm::runtime::SpeculativeDecodingModule::mMaxNumPaths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleaSERK25SpeculativeDecodingModule">tensorrt_llm::runtime::SpeculativeDecodingModule::operator= (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule18setMaxDraftPathLenE10SizeType32">tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxDraftPathLen (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule17setMaxDraftTokensE10SizeType32">tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxDraftTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule14setMaxNumPathsE10SizeType32">tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxNumPaths (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleE10SizeType3210SizeType3210SizeType32">tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleERK25SpeculativeDecodingModule">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleEv">[2]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleD0Ev">tensorrt_llm::runtime::SpeculativeDecodingModule::~SpeculativeDecodingModule (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime12StringPtrMapE">tensorrt_llm::runtime::StringPtrMap (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10TllmLoggerE">tensorrt_llm::runtime::TllmLogger (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10TllmLogger8getLevelEv">tensorrt_llm::runtime::TllmLogger::getLevel (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE">tensorrt_llm::runtime::TllmLogger::log (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10TllmLogger8setLevelE8Severity">tensorrt_llm::runtime::TllmLogger::setLevel (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9to_stringERK26LoraCachePageManagerConfig">tensorrt_llm::runtime::to_string (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9to_stringERKN9LoraCache21TaskLayerModuleConfigE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16TokenExtraIdTypeE">tensorrt_llm::runtime::TokenExtraIdType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TokenIdTypeE">tensorrt_llm::runtime::TokenIdType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0_bEN12tensorrt_llm7runtime11TRTDataTypeE">tensorrt_llm::runtime::TRTDataType (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIbEE">tensorrt_llm::runtime::TRTDataType&lt;bool&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIbE5valueE">tensorrt_llm::runtime::TRTDataType&lt;bool&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIfEE">tensorrt_llm::runtime::TRTDataType&lt;float&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIfE5valueE">tensorrt_llm::runtime::TRTDataType&lt;float&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeI4halfEE">tensorrt_llm::runtime::TRTDataType&lt;half&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeI4halfE5valueE">tensorrt_llm::runtime::TRTDataType&lt;half&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7kernels13FinishedStateEEE">tensorrt_llm::runtime::TRTDataType&lt;kernels::FinishedState&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7kernels13FinishedStateEE5valueE">tensorrt_llm::runtime::TRTDataType&lt;kernels::FinishedState&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7kernels12KVCacheIndexEEE">tensorrt_llm::runtime::TRTDataType&lt;kernels::KVCacheIndex&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7kernels12KVCacheIndexEE5valueE">tensorrt_llm::runtime::TRTDataType&lt;kernels::KVCacheIndex&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7runtime11RequestTypeEEE">tensorrt_llm::runtime::TRTDataType&lt;runtime::RequestType&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7runtime11RequestTypeEE5valueE">tensorrt_llm::runtime::TRTDataType&lt;runtime::RequestType&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEEE">tensorrt_llm::runtime::TRTDataType&lt;std::int32_t&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEE5valueE">tensorrt_llm::runtime::TRTDataType&lt;std::int32_t&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEEE">tensorrt_llm::runtime::TRTDataType&lt;std::int64_t&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEE5valueE">tensorrt_llm::runtime::TRTDataType&lt;std::int64_t&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEEE">tensorrt_llm::runtime::TRTDataType&lt;std::int8_t&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEE5valueE">tensorrt_llm::runtime::TRTDataType&lt;std::int8_t&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEEE">tensorrt_llm::runtime::TRTDataType&lt;std::uint32_t&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEE5valueE">tensorrt_llm::runtime::TRTDataType&lt;std::uint32_t&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEEE">tensorrt_llm::runtime::TRTDataType&lt;std::uint64_t&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEE5valueE">tensorrt_llm::runtime::TRTDataType&lt;std::uint64_t&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEEE">tensorrt_llm::runtime::TRTDataType&lt;std::uint8_t&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEE5valueE">tensorrt_llm::runtime::TRTDataType&lt;std::uint8_t&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime11TRTDataTypeIP1TEE">tensorrt_llm::runtime::TRTDataType&lt;T*&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE15kUnderlyingTypeE">tensorrt_llm::runtime::TRTDataType&lt;T*&gt;::kUnderlyingType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE5valueE">tensorrt_llm::runtime::TRTDataType&lt;T*&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIPvEE">tensorrt_llm::runtime::TRTDataType&lt;void*&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIPvE5valueE">tensorrt_llm::runtime::TRTDataType&lt;void*&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19UnicastConfiguratorE">tensorrt_llm::runtime::UnicastConfigurator (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator8mAddressE">tensorrt_llm::runtime::UnicastConfigurator::mAddress (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator5mDescE">tensorrt_llm::runtime::UnicastConfigurator::mDesc (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator5mSizeE">tensorrt_llm::runtime::UnicastConfigurator::mSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator5setupE28CUmemGenericAllocationHandle">tensorrt_llm::runtime::UnicastConfigurator::setup (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator8teardownE28CUmemGenericAllocationHandleb">tensorrt_llm::runtime::UnicastConfigurator::teardown (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator19UnicastConfiguratorE11CUdeviceptr6size_tRK15CUmemAccessDesc">tensorrt_llm::runtime::UnicastConfigurator::UnicastConfigurator (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11UniqueTokenE">tensorrt_llm::runtime::UniqueToken (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11UniqueTokeneqERK11UniqueToken">tensorrt_llm::runtime::UniqueToken::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11UniqueToken12tokenExtraIdE">tensorrt_llm::runtime::UniqueToken::tokenExtraId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11UniqueToken7tokenIdE">tensorrt_llm::runtime::UniqueToken::tokenId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16VecTokenExtraIdsE">tensorrt_llm::runtime::VecTokenExtraIds (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15VecUniqueTokensE">tensorrt_llm::runtime::VecUniqueTokens (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11WorldConfigE">tensorrt_llm::runtime::WorldConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig17enableAttentionDPEv">tensorrt_llm::runtime::WorldConfig::enableAttentionDP (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getContextParallelGroupEv">tensorrt_llm::runtime::WorldConfig::getContextParallelGroup (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getContextParallelismEv">tensorrt_llm::runtime::WorldConfig::getContextParallelism (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getContextParallelRankEv">tensorrt_llm::runtime::WorldConfig::getContextParallelRank (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig9getDeviceEv">tensorrt_llm::runtime::WorldConfig::getDevice (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getDeviceOfE10SizeType32">tensorrt_llm::runtime::WorldConfig::getDeviceOf (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig15getGpusPerGroupEv">tensorrt_llm::runtime::WorldConfig::getGpusPerGroup (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig14getGpusPerNodeEv">tensorrt_llm::runtime::WorldConfig::getGpusPerNode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getLastRankEv">tensorrt_llm::runtime::WorldConfig::getLastRank (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig12getLocalRankEv">tensorrt_llm::runtime::WorldConfig::getLocalRank (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getNodeRankEv">tensorrt_llm::runtime::WorldConfig::getNodeRank (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig13getNodeRankOfE10SizeType32">tensorrt_llm::runtime::WorldConfig::getNodeRankOf (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig24getPipelineParallelGroupEv">tensorrt_llm::runtime::WorldConfig::getPipelineParallelGroup (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getPipelineParallelismEv">tensorrt_llm::runtime::WorldConfig::getPipelineParallelism (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getPipelineParallelRankEv">tensorrt_llm::runtime::WorldConfig::getPipelineParallelRank (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getRankEv">tensorrt_llm::runtime::WorldConfig::getRank (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getSizeEv">tensorrt_llm::runtime::WorldConfig::getSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getTensorParallelGroupEv">tensorrt_llm::runtime::WorldConfig::getTensorParallelGroup (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig20getTensorParallelismEv">tensorrt_llm::runtime::WorldConfig::getTensorParallelism (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getTensorParallelRankEv">tensorrt_llm::runtime::WorldConfig::getTensorParallelRank (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig17isContextParallelEv">tensorrt_llm::runtime::WorldConfig::isContextParallel (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isFirstContextParallelRankEv">tensorrt_llm::runtime::WorldConfig::isFirstContextParallelRank (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig27isFirstPipelineParallelRankEv">tensorrt_llm::runtime::WorldConfig::isFirstPipelineParallelRank (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig25isFirstTensorParallelRankEv">tensorrt_llm::runtime::WorldConfig::isFirstTensorParallelRank (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isLastPipelineParallelRankEv">tensorrt_llm::runtime::WorldConfig::isLastPipelineParallelRank (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig18isPipelineParallelEv">tensorrt_llm::runtime::WorldConfig::isPipelineParallel (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig16isTensorParallelEv">tensorrt_llm::runtime::WorldConfig::isTensorParallel (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11WorldConfig19kDefaultGpusPerNodeE">tensorrt_llm::runtime::WorldConfig::kDefaultGpusPerNode (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11WorldConfig19mContextParallelismE">tensorrt_llm::runtime::WorldConfig::mContextParallelism (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11WorldConfig10mDeviceIdsE">tensorrt_llm::runtime::WorldConfig::mDeviceIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11WorldConfig18mEnableAttentionDPE">tensorrt_llm::runtime::WorldConfig::mEnableAttentionDP (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11WorldConfig12mGpusPerNodeE">tensorrt_llm::runtime::WorldConfig::mGpusPerNode (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb">tensorrt_llm::runtime::WorldConfig::mpi (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11WorldConfig20mPipelineParallelismE">tensorrt_llm::runtime::WorldConfig::mPipelineParallelism (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11WorldConfig5mRankE">tensorrt_llm::runtime::WorldConfig::mRank (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11WorldConfig18mTensorParallelismE">tensorrt_llm::runtime::WorldConfig::mTensorParallelism (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig14validMpiConfigEv">tensorrt_llm::runtime::WorldConfig::validMpiConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb">tensorrt_llm::runtime::WorldConfig::WorldConfig (C++ function)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CompletionOutput.text">text (tensorrt_llm.llmapi.CompletionOutput attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CompletionOutput.text_diff">text_diff (tensorrt_llm.llmapi.CompletionOutput attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#id4">(tensorrt_llm.llmapi.CompletionOutput property)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.timeout_iters">timeout_iters (tensorrt_llm.llmapi.AttentionDpConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.TimestepEmbedding">TimestepEmbedding (class in tensorrt_llm.layers.embedding)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.Timesteps">Timesteps (class in tensorrt_llm.layers.embedding)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.title">title() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.title">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.title">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.title">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.to_dict">to_dict() (tensorrt_llm.llmapi.CalibConfig method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.to_dict">(tensorrt_llm.llmapi.QuantConfig method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.ChatGLMConfig.to_dict">(tensorrt_llm.models.ChatGLMConfig method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.CogVLMConfig.to_dict">(tensorrt_llm.models.CogVLMConfig method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.DbrxConfig.to_dict">(tensorrt_llm.models.DbrxConfig method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.FalconConfig.to_dict">(tensorrt_llm.models.FalconConfig method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaConfig.to_dict">(tensorrt_llm.models.GemmaConfig method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTConfig.to_dict">(tensorrt_llm.models.GPTConfig method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTJConfig.to_dict">(tensorrt_llm.models.GPTJConfig method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.LLaMAConfig.to_dict">(tensorrt_llm.models.LLaMAConfig method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.MedusaConfig.to_dict">(tensorrt_llm.models.MedusaConfig method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.to_dict">(tensorrt_llm.models.PretrainedConfig method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.to_json_file">to_json_file() (tensorrt_llm.models.PretrainedConfig method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.to_layer_quant_config">to_layer_quant_config() (tensorrt_llm.models.PretrainedConfig method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.to_legacy_setting">to_legacy_setting() (tensorrt_llm.plugin.PluginConfig method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.LabelEmbedding.token_drop">token_drop() (tensorrt_llm.layers.embedding.LabelEmbedding method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.token_end">token_end (tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig property)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CompletionOutput.token_ids">token_ids (tensorrt_llm.llmapi.CompletionOutput attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CompletionOutput.token_ids_diff">token_ids_diff (tensorrt_llm.llmapi.CompletionOutput attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#id5">(tensorrt_llm.llmapi.CompletionOutput property)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheRetentionConfig.token_range_retention_configs">token_range_retention_configs (tensorrt_llm.llmapi.KvCacheRetentionConfig property)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.token_start">token_start (tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig property)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.tokenizer">tokenizer (tensorrt_llm.llmapi.LLM attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#id1">(tensorrt_llm.llmapi.LLM property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MultimodalEncoder.tokenizer">(tensorrt_llm.llmapi.MultimodalEncoder property)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.tokenizer">(tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.tokenizer">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.tokenizer_image_token">tokenizer_image_token() (tensorrt_llm.runtime.MultimodalModelRunner static method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.tokenizer_max_seq_length">tokenizer_max_seq_length (tensorrt_llm.llmapi.CalibConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.tokenizer_mode">tokenizer_mode (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.tokenizer_mode">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.tokenizer_revision">tokenizer_revision (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.tokenizer_revision">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.tokens_per_block">tokens_per_block (tensorrt_llm.llmapi.KvCacheConfig attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.tokens_per_block">(tensorrt_llm.plugin.PluginConfig attribute)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.tokens_per_block">(tensorrt_llm.runtime.GenerationSession property)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.tokens_per_block">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.top_k">top_k (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.top_k">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.top_p">top_p (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.top_p">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.top_p_decay">top_p_decay (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.top_p_decay">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.top_p_min">top_p_min (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.top_p_min">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.top_p_reset_ids">top_p_reset_ids (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.top_p_reset_ids">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.topk">topk (tensorrt_llm.llmapi.RocketSparseAttentionConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.topk">topk() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.topr">topr (tensorrt_llm.llmapi.RocketSparseAttentionConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.torch_compile_config">torch_compile_config (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig">TorchCompileConfig (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.Config">TorchCompileConfig.Config (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs">TorchLlmArgs (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.Config">TorchLlmArgs.Config (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.Linear.tp_split_dim">tp_split_dim() (tensorrt_llm.layers.linear.Linear class method)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.LinearBase.tp_split_dim">(tensorrt_llm.layers.linear.LinearBase class method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.RowLinear.tp_split_dim">(tensorrt_llm.layers.linear.RowLinear class method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheRetentionConfig.transfer_mode">transfer_mode (tensorrt_llm.llmapi.KvCacheRetentionConfig property)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.translate">translate() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.translate">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.translate">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.translate">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.transpose">transpose() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.transpose">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li>
    trtllm-bench command line option

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-log_level">--log_level</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-m">--model</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-model_path">--model_path</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-w">--workspace</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-m">-m</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-w">-w</a>
</li>
      </ul></li>
      <li>
    trtllm-bench-build command line option

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-build-dataset">--dataset</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-build-max_batch_size">--max_batch_size</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-build-max_num_tokens">--max_num_tokens</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-build-max_seq_len">--max_seq_len</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-build-no_weights_loading">--no_weights_loading</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-build-pp">--pp_size</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-build-q">--quantization</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-build-target_input_len">--target_input_len</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-build-target_output_len">--target_output_len</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-build-tp">--tp_size</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-build-trust_remote_code">--trust_remote_code</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-build-pp">-pp</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-build-q">-q</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-build-tp">-tp</a>
</li>
      </ul></li>
      <li>
    trtllm-bench-latency command line option

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-backend">--backend</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-beam_width">--beam_width</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-concurrency">--concurrency</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-dataset">--dataset</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-engine_dir">--engine_dir</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-ep">--ep</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-extra_llm_api_options">--extra_llm_api_options</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-iteration_log">--iteration_log</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-kv_cache_free_gpu_mem_fraction">--kv_cache_free_gpu_mem_fraction</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-max_input_len">--max_input_len</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-max_seq_len">--max_seq_len</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-medusa_choices">--medusa_choices</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-modality">--modality</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-num_requests">--num_requests</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-pp">--pp</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-report_json">--report_json</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-sampler_options">--sampler_options</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-tp">--tp</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-latency-warmup">--warmup</a>
</li>
      </ul></li>
      <li>
    trtllm-bench-throughput command line option

      <ul>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-backend">--backend</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-beam_width">--beam_width</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-cluster_size">--cluster_size</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-concurrency">--concurrency</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-custom_module_dirs">--custom_module_dirs</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-data_device">--data_device</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-dataset">--dataset</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-enable_chunked_context">--disable_chunked_context</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-enable_chunked_context">--enable_chunked_context</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-engine_dir">--engine_dir</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-eos_id">--eos_id</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-ep">--ep</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-extra_llm_api_options">--extra_llm_api_options</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-image_data_format">--image_data_format</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-iteration_log">--iteration_log</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-kv_cache_free_gpu_mem_fraction">--kv_cache_free_gpu_mem_fraction</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-max_batch_size">--max_batch_size</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-max_input_len">--max_input_len</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-max_num_tokens">--max_num_tokens</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-max_seq_len">--max_seq_len</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-modality">--modality</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-no_skip_tokenizer_init">--no_skip_tokenizer_init</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-num_requests">--num_requests</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-output_json">--output_json</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-pp">--pp</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-report_json">--report_json</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-request_json">--request_json</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-sampler_options">--sampler_options</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-scheduler_policy">--scheduler_policy</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-streaming">--streaming</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-target_input_len">--target_input_len</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-target_output_len">--target_output_len</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-tp">--tp</a>
</li>
        <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-warmup">--warmup</a>
</li>
      </ul></li>
      <li>
    trtllm-eval command line option

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-backend">--backend</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-disable_kv_cache_reuse">--disable_kv_cache_reuse</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-ep_size">--ep_size</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-extra_llm_api_options">--extra_llm_api_options</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpus_per_node">--gpus_per_node</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-kv_cache_free_gpu_memory_fraction">--kv_cache_free_gpu_memory_fraction</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-log_level">--log_level</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-max_batch_size">--max_batch_size</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-max_beam_width">--max_beam_width</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-max_num_tokens">--max_num_tokens</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-max_seq_len">--max_seq_len</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-model">--model</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-pp_size">--pp_size</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-tokenizer">--tokenizer</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-tp_size">--tp_size</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-trust_remote_code">--trust_remote_code</a>
</li>
      </ul></li>
      <li>
    trtllm-eval-cnn_dailymail command line option

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-cnn_dailymail-apply_chat_template">--apply_chat_template</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-cnn_dailymail-dataset_path">--dataset_path</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-cnn_dailymail-max_input_length">--max_input_length</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-cnn_dailymail-max_output_length">--max_output_length</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-cnn_dailymail-num_samples">--num_samples</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-cnn_dailymail-random_seed">--random_seed</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-cnn_dailymail-rouge_path">--rouge_path</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-cnn_dailymail-system_prompt">--system_prompt</a>
</li>
      </ul></li>
      <li>
    trtllm-eval-gpqa_diamond command line option

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_diamond-apply_chat_template">--apply_chat_template</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_diamond-chat_template_kwargs">--chat_template_kwargs</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_diamond-dataset_path">--dataset_path</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_diamond-max_input_length">--max_input_length</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_diamond-max_output_length">--max_output_length</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_diamond-num_samples">--num_samples</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_diamond-random_seed">--random_seed</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_diamond-system_prompt">--system_prompt</a>
</li>
      </ul></li>
      <li>
    trtllm-eval-gpqa_extended command line option

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_extended-apply_chat_template">--apply_chat_template</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_extended-chat_template_kwargs">--chat_template_kwargs</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_extended-dataset_path">--dataset_path</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_extended-max_input_length">--max_input_length</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_extended-max_output_length">--max_output_length</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_extended-num_samples">--num_samples</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_extended-random_seed">--random_seed</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_extended-system_prompt">--system_prompt</a>
</li>
      </ul></li>
      <li>
    trtllm-eval-gpqa_main command line option

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_main-apply_chat_template">--apply_chat_template</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_main-chat_template_kwargs">--chat_template_kwargs</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_main-dataset_path">--dataset_path</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_main-max_input_length">--max_input_length</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_main-max_output_length">--max_output_length</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_main-num_samples">--num_samples</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_main-random_seed">--random_seed</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gpqa_main-system_prompt">--system_prompt</a>
</li>
      </ul></li>
      <li>
    trtllm-eval-gsm8k command line option

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gsm8k-apply_chat_template">--apply_chat_template</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gsm8k-chat_template_kwargs">--chat_template_kwargs</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gsm8k-dataset_path">--dataset_path</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gsm8k-fewshot_as_multiturn">--fewshot_as_multiturn</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gsm8k-max_input_length">--max_input_length</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gsm8k-max_output_length">--max_output_length</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gsm8k-num_samples">--num_samples</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gsm8k-random_seed">--random_seed</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-gsm8k-system_prompt">--system_prompt</a>
</li>
      </ul></li>
      <li>
    trtllm-eval-json_mode_eval command line option

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-json_mode_eval-dataset_path">--dataset_path</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-json_mode_eval-max_input_length">--max_input_length</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-json_mode_eval-max_output_length">--max_output_length</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-json_mode_eval-num_samples">--num_samples</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-json_mode_eval-random_seed">--random_seed</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-json_mode_eval-system_prompt">--system_prompt</a>
</li>
      </ul></li>
      <li>
    trtllm-eval-longbench_v2 command line option

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-longbench_v2-apply_chat_template">--apply_chat_template</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-longbench_v2-cot">--cot</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-longbench_v2-dataset_path">--dataset_path</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-longbench_v2-difficulty">--difficulty</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-longbench_v2-domain">--domain</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-longbench_v2-length">--length</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-longbench_v2-max_input_length">--max_input_length</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-longbench_v2-max_len">--max_len</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-longbench_v2-max_output_length">--max_output_length</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-longbench_v2-no_context">--no_context</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-longbench_v2-num_samples">--num_samples</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-longbench_v2-output_dir">--output_dir</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-longbench_v2-prompts_dir">--prompts_dir</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-longbench_v2-rag">--rag</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-longbench_v2-random_seed">--random_seed</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-longbench_v2-start_idx">--start_idx</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-longbench_v2-system_prompt">--system_prompt</a>
</li>
      </ul></li>
      <li>
    trtllm-eval-mmlu command line option

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmlu-accuracy_threshold">--accuracy_threshold</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmlu-apply_chat_template">--apply_chat_template</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmlu-chat_template_kwargs">--chat_template_kwargs</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmlu-check_accuracy">--check_accuracy</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmlu-dataset_path">--dataset_path</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmlu-max_input_length">--max_input_length</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmlu-max_output_length">--max_output_length</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmlu-num_fewshot">--num_fewshot</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmlu-num_samples">--num_samples</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmlu-random_seed">--random_seed</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmlu-system_prompt">--system_prompt</a>
</li>
      </ul></li>
      <li>
    trtllm-eval-mmmu command line option

      <ul>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmmu-chat_template_kwargs">--chat_template_kwargs</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmmu-dataset_path">--dataset_path</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmmu-max_input_length">--max_input_length</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmmu-max_output_length">--max_output_length</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmmu-num_samples">--num_samples</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmmu-random_seed">--random_seed</a>
</li>
        <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmmu-system_prompt">--system_prompt</a>
</li>
      </ul></li>
      <li>
    trtllm-serve-disaggregated command line option

      <ul>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-c">--config_file</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-l">--log_level</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-m">--metadata_server_config_file</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-metrics-log-interval">--metrics-log-interval</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-r">--request_timeout</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-t">--server_start_timeout</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-c">-c</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-l">-l</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-m">-m</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-r">-r</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated-t">-t</a>
</li>
      </ul></li>
      <li>
    trtllm-serve-disaggregated_mpi_worker command line option

      <ul>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated_mpi_worker-c">--config_file</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated_mpi_worker-log_level">--log_level</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-disaggregated_mpi_worker-c">-c</a>
</li>
      </ul></li>
      <li>
    trtllm-serve-mm_embedding_serve command line option

      <ul>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-mm_embedding_serve-extra_encoder_options">--extra_encoder_options</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-mm_embedding_serve-gpus_per_node">--gpus_per_node</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-mm_embedding_serve-host">--host</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-mm_embedding_serve-log_level">--log_level</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-mm_embedding_serve-max_batch_size">--max_batch_size</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-mm_embedding_serve-max_num_tokens">--max_num_tokens</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-mm_embedding_serve-metadata_server_config_file">--metadata_server_config_file</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-mm_embedding_serve-port">--port</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-mm_embedding_serve-trust_remote_code">--trust_remote_code</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-mm_embedding_serve-arg-MODEL">MODEL</a>
</li>
      </ul></li>
      <li>
    trtllm-serve-serve command line option

      <ul>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-backend">--backend</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-cluster_size">--cluster_size</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-disagg_cluster_uri">--disagg_cluster_uri</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-enable_chunked_prefill">--enable_chunked_prefill</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-ep_size">--ep_size</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-extra_llm_api_options">--extra_llm_api_options</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-fail_fast_on_attention_window_too_large">--fail_fast_on_attention_window_too_large</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-gpus_per_node">--gpus_per_node</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-host">--host</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-kv_cache_free_gpu_memory_fraction">--kv_cache_free_gpu_memory_fraction</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-log_level">--log_level</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-max_batch_size">--max_batch_size</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-max_beam_width">--max_beam_width</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-max_num_tokens">--max_num_tokens</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-max_seq_len">--max_seq_len</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-media_io_kwargs">--media_io_kwargs</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-metadata_server_config_file">--metadata_server_config_file</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-num_postprocess_workers">--num_postprocess_workers</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-otlp_traces_endpoint">--otlp_traces_endpoint</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-port">--port</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-pp_size">--pp_size</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-reasoning_parser">--reasoning_parser</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-server_role">--server_role</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-tokenizer">--tokenizer</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-tool_parser">--tool_parser</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-tp_size">--tp_size</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-trust_remote_code">--trust_remote_code</a>
</li>
        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-arg-MODEL">MODEL</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.trtllm_modules_to_hf_modules">trtllm_modules_to_hf_modules (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs">TrtLlmArgs (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.Config">TrtLlmArgs.Config (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.truncate_prompt_tokens">truncate_prompt_tokens (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.trust_remote_code">trust_remote_code (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.trust_remote_code">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceStrategy.TWOSHOT">TWOSHOT (tensorrt_llm.functional.AllReduceStrategy attribute)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="U">U</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceStrategy.UB">UB (tensorrt_llm.functional.AllReduceStrategy attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.unary">unary() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.unbind">unbind() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.unbind">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.SD3Transformer2DModel.unfuse_qkv_projections">unfuse_qkv_projections() (tensorrt_llm.models.SD3Transformer2DModel method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.DiT.unpatchify">unpatchify() (tensorrt_llm.models.DiT method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.unsqueeze">unsqueeze() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.unsqueeze">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.update">update() (tensorrt_llm.runtime.SamplingConfig method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.update_forward_refs">update_forward_refs() (tensorrt_llm.llmapi.AttentionDpConfig class method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.update_forward_refs">(tensorrt_llm.llmapi.AutoDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.update_forward_refs">(tensorrt_llm.llmapi.BuildConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.update_forward_refs">(tensorrt_llm.llmapi.CacheTransceiverConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.update_forward_refs">(tensorrt_llm.llmapi.CalibConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig.update_forward_refs">(tensorrt_llm.llmapi.CudaGraphConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.update_forward_refs">(tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.update_forward_refs">(tensorrt_llm.llmapi.DraftTargetDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.update_forward_refs">(tensorrt_llm.llmapi.DynamicBatchConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.update_forward_refs">(tensorrt_llm.llmapi.EagleDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.update_forward_refs">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.update_forward_refs">(tensorrt_llm.llmapi.KvCacheConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.update_forward_refs">(tensorrt_llm.llmapi.LookaheadDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.update_forward_refs">(tensorrt_llm.llmapi.MedusaDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.update_forward_refs">(tensorrt_llm.llmapi.MoeConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.update_forward_refs">(tensorrt_llm.llmapi.MTPDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.update_forward_refs">(tensorrt_llm.llmapi.NGramDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.update_forward_refs">(tensorrt_llm.llmapi.RocketSparseAttentionConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.update_forward_refs">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.update_forward_refs">(tensorrt_llm.llmapi.SchedulerConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.update_forward_refs">(tensorrt_llm.llmapi.TorchCompileConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.update_forward_refs">(tensorrt_llm.llmapi.UserProvidedDecodingConfig class method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.update_kv_cache_type">update_kv_cache_type() (tensorrt_llm.llmapi.BuildConfig method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.update_output_ids_by_offset">update_output_ids_by_offset() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceParams.update_strategy">update_strategy() (tensorrt_llm.functional.AllReduceParams method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.upper">upper() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.upper">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.upper">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.upper">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.use_beam_hyps">use_beam_hyps (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.use_beam_search">use_beam_search (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.use_dynamic_tree">use_dynamic_tree (tensorrt_llm.llmapi.EagleDecodingConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.use_fp8_context_fmha">use_fp8_context_fmha (tensorrt_llm.plugin.PluginConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.use_fused_mlp">use_fused_mlp (tensorrt_llm.plugin.PluginConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.use_gemm_allreduce_plugin">use_gemm_allreduce_plugin (tensorrt_llm.runtime.GenerationSession property)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.use_gpt_attention_plugin">use_gpt_attention_plugin (tensorrt_llm.runtime.GenerationSession property)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.use_kv_cache">use_kv_cache (tensorrt_llm.runtime.GenerationSession property)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.DecoderModel.use_lora">use_lora() (tensorrt_llm.models.DecoderModel method)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.EncoderModel.use_lora">(tensorrt_llm.models.EncoderModel method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaForCausalLM.use_lora">(tensorrt_llm.models.GemmaForCausalLM method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTForCausalLM.use_lora">(tensorrt_llm.models.GPTForCausalLM method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.LLaMAForCausalLM.use_lora">(tensorrt_llm.models.LLaMAForCausalLM method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.MLLaMAForCausalLM.use_lora">(tensorrt_llm.models.MLLaMAForCausalLM method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.Phi3ForCausalLM.use_lora">(tensorrt_llm.models.Phi3ForCausalLM method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.PhiForCausalLM.use_lora">(tensorrt_llm.models.PhiForCausalLM method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.use_lora_plugin">use_lora_plugin (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.use_lora_plugin">(tensorrt_llm.runtime.ModelRunner property)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.use_low_precision_moe_combine">use_low_precision_moe_combine (tensorrt_llm.llmapi.MoeConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.use_mamba_conv1d_plugin">use_mamba_conv1d_plugin (tensorrt_llm.runtime.GenerationSession property)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.use_meta_recipe">use_meta_recipe (tensorrt_llm.llmapi.QuantConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.use_mrope">use_mrope (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.use_mtp_vanilla">use_mtp_vanilla (tensorrt_llm.llmapi.MTPDecodingConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.use_paged_context_fmha">use_paged_context_fmha (tensorrt_llm.plugin.PluginConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.EncoderModel.use_prompt_tuning">use_prompt_tuning() (tensorrt_llm.models.EncoderModel method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.use_refit">use_refit (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.use_relaxed_acceptance_for_thinking">use_relaxed_acceptance_for_thinking (tensorrt_llm.llmapi.MTPDecodingConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.use_strip_plan">use_strip_plan (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.use_uvm">use_uvm (tensorrt_llm.llmapi.KvCacheConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.user_buffer">user_buffer (tensorrt_llm.plugin.PluginConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.SpeculativeDecodingMode.USER_PROVIDED">USER_PROVIDED (tensorrt_llm.models.SpeculativeDecodingMode attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig">UserProvidedDecodingConfig (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.Config">UserProvidedDecodingConfig.Config (class in tensorrt_llm.llmapi)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="V">V</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AttentionDpConfig.validate">validate() (tensorrt_llm.llmapi.AttentionDpConfig class method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.AutoDecodingConfig.validate">(tensorrt_llm.llmapi.AutoDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.validate">(tensorrt_llm.llmapi.BuildConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CacheTransceiverConfig.validate">(tensorrt_llm.llmapi.CacheTransceiverConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.validate">(tensorrt_llm.llmapi.CalibConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig.validate">(tensorrt_llm.llmapi.CudaGraphConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.validate">(tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DraftTargetDecodingConfig.validate">(tensorrt_llm.llmapi.DraftTargetDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.DynamicBatchConfig.validate">(tensorrt_llm.llmapi.DynamicBatchConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.EagleDecodingConfig.validate">(tensorrt_llm.llmapi.EagleDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.validate">(tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.validate">(tensorrt_llm.llmapi.KvCacheConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.validate">(tensorrt_llm.llmapi.LookaheadDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.validate">(tensorrt_llm.llmapi.MedusaDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig.validate">(tensorrt_llm.llmapi.MoeConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.validate">(tensorrt_llm.llmapi.MTPDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NGramDecodingConfig.validate">(tensorrt_llm.llmapi.NGramDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.validate">(tensorrt_llm.llmapi.RocketSparseAttentionConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.validate">(tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.validate">(tensorrt_llm.llmapi.SchedulerConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.validate">(tensorrt_llm.llmapi.TorchCompileConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.UserProvidedDecodingConfig.validate">(tensorrt_llm.llmapi.UserProvidedDecodingConfig method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.validate">(tensorrt_llm.plugin.PluginConfig method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.validate_and_init_tokenizer">validate_and_init_tokenizer() (tensorrt_llm.llmapi.TorchLlmArgs method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.validate_and_init_tokenizer">(tensorrt_llm.llmapi.TrtLlmArgs method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.validate_attention_dp_config">validate_attention_dp_config() (tensorrt_llm.llmapi.TorchLlmArgs method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.validate_batch_wait_max_tokens_ratio">validate_batch_wait_max_tokens_ratio() (tensorrt_llm.llmapi.TorchLlmArgs method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.validate_batch_wait_timeout_iters">validate_batch_wait_timeout_iters() (tensorrt_llm.llmapi.TorchLlmArgs method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.validate_batch_wait_timeout_ms">validate_batch_wait_timeout_ms() (tensorrt_llm.llmapi.TorchLlmArgs method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.validate_build_config_remaining">validate_build_config_remaining() (tensorrt_llm.llmapi.TorchLlmArgs method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.validate_build_config_remaining">(tensorrt_llm.llmapi.TrtLlmArgs method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.validate_build_config_with_runtime_params">validate_build_config_with_runtime_params() (tensorrt_llm.llmapi.TorchLlmArgs method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.validate_build_config_with_runtime_params">(tensorrt_llm.llmapi.TrtLlmArgs method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.validate_capture_num_tokens">validate_capture_num_tokens() (tensorrt_llm.llmapi.TorchCompileConfig class method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.validate_checkpoint_format">validate_checkpoint_format() (tensorrt_llm.llmapi.TorchLlmArgs method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.validate_cuda_graph_config">validate_cuda_graph_config() (tensorrt_llm.llmapi.TorchLlmArgs method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CudaGraphConfig.validate_cuda_graph_max_batch_size">validate_cuda_graph_max_batch_size() (tensorrt_llm.llmapi.CudaGraphConfig class method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.validate_dtype">validate_dtype() (tensorrt_llm.llmapi.TorchLlmArgs class method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.validate_dtype">(tensorrt_llm.llmapi.TrtLlmArgs class method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.validate_dtype_not_auto">validate_dtype_not_auto() (tensorrt_llm.plugin.PluginConfig class method)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.validate_enable_build_cache">validate_enable_build_cache() (tensorrt_llm.llmapi.TrtLlmArgs method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.validate_free_gpu_memory_fraction">validate_free_gpu_memory_fraction() (tensorrt_llm.llmapi.KvCacheConfig class method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.validate_gpus_per_node">validate_gpus_per_node() (tensorrt_llm.llmapi.TorchLlmArgs class method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.validate_gpus_per_node">(tensorrt_llm.llmapi.TrtLlmArgs class method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.validate_kv_cache_dtype">validate_kv_cache_dtype() (tensorrt_llm.llmapi.TrtLlmArgs method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.validate_load_balancer">validate_load_balancer() (tensorrt_llm.llmapi.TorchLlmArgs method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.validate_lora_config_consistency">validate_lora_config_consistency() (tensorrt_llm.llmapi.TorchLlmArgs method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.validate_lora_config_consistency">(tensorrt_llm.llmapi.TrtLlmArgs method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.validate_max_attention_window">validate_max_attention_window() (tensorrt_llm.llmapi.KvCacheConfig class method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.validate_max_gpu_total_bytes">validate_max_gpu_total_bytes() (tensorrt_llm.llmapi.KvCacheConfig class method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.validate_model">validate_model() (tensorrt_llm.llmapi.TorchLlmArgs class method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.validate_model">(tensorrt_llm.llmapi.TrtLlmArgs class method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.validate_model_format_misc">validate_model_format_misc() (tensorrt_llm.llmapi.TorchLlmArgs method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.validate_model_format_misc">(tensorrt_llm.llmapi.TrtLlmArgs method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.validate_parallel_config">validate_parallel_config() (tensorrt_llm.llmapi.TorchLlmArgs method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.validate_parallel_config">(tensorrt_llm.llmapi.TrtLlmArgs method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.validate_peft_cache_config">validate_peft_cache_config() (tensorrt_llm.llmapi.TorchLlmArgs method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.validate_peft_cache_config">(tensorrt_llm.llmapi.TrtLlmArgs method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.validate_positive_values">validate_positive_values() (tensorrt_llm.llmapi.LookaheadDecodingConfig class method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.validate_quant_config">validate_quant_config() (tensorrt_llm.llmapi.TrtLlmArgs class method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.validate_ray_worker_extension_cls">validate_ray_worker_extension_cls() (tensorrt_llm.llmapi.TorchLlmArgs method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.validate_runtime_args">validate_runtime_args() (tensorrt_llm.llmapi.TorchLlmArgs method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.validate_runtime_args">(tensorrt_llm.llmapi.TrtLlmArgs method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.validate_speculative_config">validate_speculative_config() (tensorrt_llm.llmapi.TorchLlmArgs method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.validate_speculative_config">(tensorrt_llm.llmapi.TrtLlmArgs method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.validate_stream_interval">validate_stream_interval() (tensorrt_llm.llmapi.TorchLlmArgs method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.validate_torch_compile_config">validate_torch_compile_config() (tensorrt_llm.llmapi.TorchLlmArgs method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchCompileConfig.validate_torch_compile_max_num_streams">validate_torch_compile_max_num_streams() (tensorrt_llm.llmapi.TorchCompileConfig class method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaConfig.VERBATIM">VERBATIM (tensorrt_llm.models.GemmaConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.video_preprocess">video_preprocess() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.view">view() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.view">(tensorrt_llm.functional.Tensor method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.TensorInfo.view">(tensorrt_llm.runtime.TensorInfo method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.visual_engine_dir">visual_engine_dir (tensorrt_llm.runtime.MultimodalModelRunner property)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.visualize_network">visualize_network (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.vocab_size">vocab_size (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.vocab_size">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.vocab_size">(tensorrt_llm.runtime.ModelRunner property)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.vocab_size">(tensorrt_llm.runtime.ModelRunnerCpp property)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.vocab_size_padded">vocab_size_padded (tensorrt_llm.runtime.ModelRunner property)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.vocab_size_padded">(tensorrt_llm.runtime.ModelRunnerCpp property)</a>
</li>
      </ul></li>
  </ul></td>
</tr></table>

<h2 id="W">W</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.W4A16">W4A16 (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.W4A16_AWQ">W4A16_AWQ (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.W4A16_GPTQ">W4A16_GPTQ (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.W4A16_MXFP4">W4A16_MXFP4 (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.W4A8_AWQ">W4A8_AWQ (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.W4A8_MXFP4_FP8">W4A8_MXFP4_FP8 (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.W4A8_MXFP4_MXFP8">W4A8_MXFP4_MXFP8 (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.W4A8_NVFP4_FP8">W4A8_NVFP4_FP8 (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.W4A8_QSERVE_PER_CHANNEL">W4A8_QSERVE_PER_CHANNEL (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.W4A8_QSERVE_PER_GROUP">W4A8_QSERVE_PER_GROUP (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.W8A16">W8A16 (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.W8A16_GPTQ">W8A16_GPTQ (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_CHANNEL">W8A8_SQ_PER_CHANNEL (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_CHANNEL_PER_TENSOR_PLUGIN">W8A8_SQ_PER_CHANNEL_PER_TENSOR_PLUGIN (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_CHANNEL_PER_TOKEN_PLUGIN">W8A8_SQ_PER_CHANNEL_PER_TOKEN_PLUGIN (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_TENSOR_PER_TOKEN_PLUGIN">W8A8_SQ_PER_TENSOR_PER_TOKEN_PLUGIN (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_TENSOR_PLUGIN">W8A8_SQ_PER_TENSOR_PLUGIN (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.warn_on_unstable_feature_usage">warn_on_unstable_feature_usage() (tensorrt_llm.llmapi.TorchLlmArgs method)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.DeepseekV2Attention.weight_loader">weight_loader() (tensorrt_llm.layers.attention.DeepseekV2Attention method)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.Embedding.weight_loader">(tensorrt_llm.layers.embedding.Embedding method)</a>
</li>
        <li><a href="legacy/python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.LinearBase.weight_loader">(tensorrt_llm.layers.linear.LinearBase method)</a>
</li>
      </ul></li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.weight_only_groupwise_quant_matmul_plugin">weight_only_groupwise_quant_matmul_plugin (tensorrt_llm.plugin.PluginConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.weight_only_quant_matmul_plugin">weight_only_quant_matmul_plugin (tensorrt_llm.plugin.PluginConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.weight_sparsity">weight_sparsity (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.weight_streaming">weight_streaming (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.where">where() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="legacy/python-api/tensorrt_llm.models.html#tensorrt_llm.models.WhisperEncoder">WhisperEncoder (class in tensorrt_llm.models)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.window_size">window_size (tensorrt_llm.llmapi.RocketSparseAttentionConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestError.with_traceback">with_traceback() (tensorrt_llm.llmapi.RequestError method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.workspace">workspace (tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.wrapped_property">wrapped_property (tensorrt_llm.llmapi.TorchLlmArgs attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.TrtLlmArgs.wrapped_property">(tensorrt_llm.llmapi.TrtLlmArgs attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.write_interval">write_interval (tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig attribute)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="Y">Y</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.yarn">yarn (tensorrt_llm.functional.PositionEmbeddingType attribute)</a>

      <ul>
        <li><a href="legacy/python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RotaryScalingType.yarn">(tensorrt_llm.functional.RotaryScalingType attribute)</a>
</li>
      </ul></li>
  </ul></td>
</tr></table>

<h2 id="Z">Z</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BatchingType.zfill">zfill() (tensorrt_llm.llmapi.BatchingType method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.zfill">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.ContextChunkingPolicy.zfill">(tensorrt_llm.llmapi.ContextChunkingPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.zfill">(tensorrt_llm.llmapi.QuantAlgo method)</a>
</li>
      </ul></li>
  </ul></td>
</tr></table>


                </article>


                <footer class="prev-next-footer d-print-none">

<div class="prev-next-area">
</div>
                </footer>

            </div>


<div class="bd-sidebar-secondary"></div>


          </div>
          <footer class="bd-footer-content">

          </footer>

      </main>
    </div>
  </div>

  <!-- Scripts loaded after <body> so the DOM is not blocked -->
  <script defer src="_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>

  <footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">

    <div class="footer-items__start">

        <div class="footer-item">
<a class="footer-brand logo" href="https://www.nvidia.com">
  <img src="_static/nvidia-logo-horiz-rgb-1c-blk-for-screen.svg" class="logo__image only-light" alt="NVIDIA"/>
  <img src="_static/nvidia-logo-horiz-rgb-1c-wht-for-screen.svg" class="logo__image only-dark" alt="NVIDIA"/>
</a></div>

        <div class="footer-item">

<div class="footer-links">


  <a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-policy/">Privacy Policy</a>
   |


  <a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
   |


  <a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
   |


  <a class="external" href="https://www.nvidia.com/en-us/about-nvidia/terms-of-service/">Terms of Service</a>
   |


  <a class="external" href="https://www.nvidia.com/en-us/about-nvidia/accessibility/">Accessibility</a>
   |


  <a class="external" href="https://www.nvidia.com/en-us/about-nvidia/company-policies/">Corporate Policies</a>
   |


  <a class="external" href="https://www.nvidia.com/en-us/product-security/">Product Security</a>
   |


  <a class="external" href="https://www.nvidia.com/en-us/contact/">Contact</a>


</div>
</div>

        <div class="footer-item">


  <p class="copyright">

      Copyright © 2025, NVidia.
      <br/>

  </p>
</div>

        <div class="footer-item">
<div class="extra_footer">

  <p>Last updated on November 05, 2025.</p>

  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>

</div></div>

    </div>


</div>

  </footer>
  </body>
</html>