TensorRT-LLMs/genindex.html



<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="./">
<head>
  <meta charset="utf-8" />
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
  <title>Index &mdash; tensorrt_llm  documentation</title>
      <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=b86133f3" />
      <link rel="stylesheet" type="text/css" href="_static/css/theme.css?v=e59714d7" />
      <link rel="stylesheet" type="text/css" href="_static/copybutton.css?v=76b2166b" />


      <script src="_static/jquery.js?v=5d32c60e"></script>
      <script src="_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
      <script src="_static/documentation_options.js?v=5929fcd5"></script>
      <script src="_static/doctools.js?v=9bcbadda"></script>
      <script src="_static/sphinx_highlight.js?v=dc90522c"></script>
      <script src="_static/clipboard.min.js?v=a7894cd8"></script>
      <script src="_static/copybutton.js?v=65e89d2a"></script>
    <script src="_static/js/theme.js"></script>
    <link rel="index" title="Index" href="#" />
    <link rel="search" title="Search" href="search.html" />
</head>

<body class="wy-body-for-nav">
  <div class="wy-grid-for-nav">
    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
      <div class="wy-side-scroll">
        <div class="wy-side-nav-search" >


          <a href="index.html" class="icon icon-home">
            tensorrt_llm
          </a>
<div role="search">
  <form id="rtd-search-form" class="wy-form" action="search.html" method="get">
    <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>
        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
              <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="overview.html">Overview</a></li>
<li class="toctree-l1"><a class="reference internal" href="quick-start-guide.html">Quick Start Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="key-features.html">Key Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="release-notes.html">Release Notes</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Installation</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="installation/linux.html">Installing on Linux</a></li>
<li class="toctree-l1"><a class="reference internal" href="installation/build-from-source-linux.html">Building from Source Code on Linux</a></li>
<li class="toctree-l1"><a class="reference internal" href="installation/windows.html">Installing on Windows</a></li>
<li class="toctree-l1"><a class="reference internal" href="installation/build-from-source-windows.html">Building from Source Code on Windows</a></li>
<li class="toctree-l1"><a class="reference internal" href="installation/grace-hopper.html">Installing on Grace Hopper</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">LLM API</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="llm-api/index.html">API Introduction</a></li>
<li class="toctree-l1"><a class="reference internal" href="llm-api/reference.html">API Reference</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">LLM API Examples</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="llm-api-examples/index.html">LLM Examples Introduction</a></li>
<li class="toctree-l1"><a class="reference internal" href="llm-api-examples/customization.html">Common Customizations</a></li>
<li class="toctree-l1"><a class="reference internal" href="llm-api-examples/llm_api_examples.html">Examples</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Model Definition API</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="python-api/tensorrt_llm.layers.html">Layers</a></li>
<li class="toctree-l1"><a class="reference internal" href="python-api/tensorrt_llm.functional.html">Functionals</a></li>
<li class="toctree-l1"><a class="reference internal" href="python-api/tensorrt_llm.models.html">Models</a></li>
<li class="toctree-l1"><a class="reference internal" href="python-api/tensorrt_llm.plugin.html">Plugin</a></li>
<li class="toctree-l1"><a class="reference internal" href="python-api/tensorrt_llm.quantization.html">Quantization</a></li>
<li class="toctree-l1"><a class="reference internal" href="python-api/tensorrt_llm.runtime.html">Runtime</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">C++ API</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="_cpp_gen/executor.html">Executor</a></li>
<li class="toctree-l1"><a class="reference internal" href="_cpp_gen/runtime.html">Runtime</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Command-Line Reference</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="commands/trtllm-build.html">trtllm-build</a></li>
<li class="toctree-l1"><a class="reference internal" href="commands/trtllm-serve.html">trtllm-serve</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Architecture</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="architecture/overview.html">TensorRT-LLM Architecture</a></li>
<li class="toctree-l1"><a class="reference internal" href="architecture/core-concepts.html">Model Definition</a></li>
<li class="toctree-l1"><a class="reference internal" href="architecture/core-concepts.html#compilation">Compilation</a></li>
<li class="toctree-l1"><a class="reference internal" href="architecture/core-concepts.html#runtime">Runtime</a></li>
<li class="toctree-l1"><a class="reference internal" href="architecture/core-concepts.html#multi-gpu-and-multi-node-support">Multi-GPU and Multi-Node Support</a></li>
<li class="toctree-l1"><a class="reference internal" href="architecture/checkpoint.html">TensorRT-LLM Checkpoint</a></li>
<li class="toctree-l1"><a class="reference internal" href="architecture/workflow.html">TensorRT-LLM Build Workflow</a></li>
<li class="toctree-l1"><a class="reference internal" href="architecture/add-model.html">Adding a Model</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Advanced</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="advanced/gpt-attention.html">Multi-Head, Multi-Query, and Group-Query Attention</a></li>
<li class="toctree-l1"><a class="reference internal" href="advanced/gpt-runtime.html">C++ GPT Runtime</a></li>
<li class="toctree-l1"><a class="reference internal" href="advanced/executor.html">Executor API</a></li>
<li class="toctree-l1"><a class="reference internal" href="advanced/graph-rewriting.html">Graph Rewriting Module</a></li>
<li class="toctree-l1"><a class="reference internal" href="advanced/inference-request.html">Inference Request</a></li>
<li class="toctree-l1"><a class="reference internal" href="advanced/inference-request.html#responses">Responses</a></li>
<li class="toctree-l1"><a class="reference internal" href="advanced/lora.html">Run gpt-2b + LoRA using GptManager / cpp runtime</a></li>
<li class="toctree-l1"><a class="reference internal" href="advanced/expert-parallelism.html">Expert Parallelism in TensorRT-LLM</a></li>
<li class="toctree-l1"><a class="reference internal" href="advanced/kv-cache-reuse.html">KV cache reuse</a></li>
<li class="toctree-l1"><a class="reference internal" href="advanced/speculative-decoding.html">Speculative Sampling</a></li>
<li class="toctree-l1"><a class="reference internal" href="advanced/disaggregated-service.html">Disaggregated-Service (experimental)</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Performance</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="performance/perf-overview.html">Overview</a></li>
<li class="toctree-l1"><a class="reference internal" href="performance/perf-benchmarking.html">Benchmarking</a></li>
<li class="toctree-l1"><a class="reference internal" href="performance/perf-best-practices.html">Best Practices</a></li>
<li class="toctree-l1"><a class="reference internal" href="performance/perf-analysis.html">Performance Analysis</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Reference</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="reference/troubleshooting.html">Troubleshooting</a></li>
<li class="toctree-l1"><a class="reference internal" href="reference/support-matrix.html">Support Matrix</a></li>
<li class="toctree-l1"><a class="reference internal" href="reference/precision.html">Numerical Precision</a></li>
<li class="toctree-l1"><a class="reference internal" href="reference/memory.html">Memory Usage of TensorRT-LLM</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Blogs</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="blogs/H100vsA100.html">H100 has 4.6x A100 Performance in TensorRT-LLM, achieving 10,000 tok/s at 100ms to first token</a></li>
<li class="toctree-l1"><a class="reference internal" href="blogs/H200launch.html">H200 achieves nearly 12,000 tokens/sec on Llama2-13B with TensorRT-LLM</a></li>
<li class="toctree-l1"><a class="reference internal" href="blogs/Falcon180B-H200.html">Falcon-180B on a single H200 GPU with INT4 AWQ, and 6.7x faster Llama-70B over A100</a></li>
<li class="toctree-l1"><a class="reference internal" href="blogs/quantization-in-TRT-LLM.html">Speed up inference with SOTA quantization techniques in TRT-LLM</a></li>
<li class="toctree-l1"><a class="reference internal" href="blogs/XQA-kernel.html">New XQA-kernel provides 2.4x more Llama-70B throughput within the same latency budget</a></li>
</ul>

        </div>
      </div>
    </nav>

    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
          <a href="index.html">tensorrt_llm</a>
      </nav>

      <div class="wy-nav-content">
        <div class="rst-content">
          <div role="navigation" aria-label="Page navigation">
  <ul class="wy-breadcrumbs">
      <li><a href="index.html" class="icon icon-home" aria-label="Home"></a></li>
      <li class="breadcrumb-item active">Index</li>
      <li class="wy-breadcrumbs-aside">
      </li>
  </ul>
  <hr/>
</div>
          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
           <div itemprop="articleBody">


<h1 id="index">Index</h1>

<div class="genindex-jumpbox">
 <a href="#Symbols"><strong>Symbols</strong></a>
 | <a href="#_"><strong>_</strong></a>
 | <a href="#A"><strong>A</strong></a>
 | <a href="#B"><strong>B</strong></a>
 | <a href="#C"><strong>C</strong></a>
 | <a href="#D"><strong>D</strong></a>
 | <a href="#E"><strong>E</strong></a>
 | <a href="#F"><strong>F</strong></a>
 | <a href="#G"><strong>G</strong></a>
 | <a href="#H"><strong>H</strong></a>
 | <a href="#I"><strong>I</strong></a>
 | <a href="#J"><strong>J</strong></a>
 | <a href="#K"><strong>K</strong></a>
 | <a href="#L"><strong>L</strong></a>
 | <a href="#M"><strong>M</strong></a>
 | <a href="#N"><strong>N</strong></a>
 | <a href="#O"><strong>O</strong></a>
 | <a href="#P"><strong>P</strong></a>
 | <a href="#Q"><strong>Q</strong></a>
 | <a href="#R"><strong>R</strong></a>
 | <a href="#S"><strong>S</strong></a>
 | <a href="#T"><strong>T</strong></a>
 | <a href="#U"><strong>U</strong></a>
 | <a href="#V"><strong>V</strong></a>
 | <a href="#W"><strong>W</strong></a>
 | <a href="#Y"><strong>Y</strong></a>

</div>
<h2 id="Symbols">Symbols</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li>
    --host

      <ul>
        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-host">trtllm-serve command line option</a>
</li>
      </ul></li>
      <li>
    --kv_cache_free_gpu_memory_fraction

      <ul>
        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-kv_cache_free_gpu_memory_fraction">trtllm-serve command line option</a>
</li>
      </ul></li>
      <li>
    --max_batch_size

      <ul>
        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-max_batch_size">trtllm-serve command line option</a>
</li>
      </ul></li>
      <li>
    --max_beam_width

      <ul>
        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-max_beam_width">trtllm-serve command line option</a>
</li>
      </ul></li>
      <li>
    --max_num_tokens

      <ul>
        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-max_num_tokens">trtllm-serve command line option</a>
</li>
      </ul></li>
      <li>
    --max_seq_len

      <ul>
        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-max_seq_len">trtllm-serve command line option</a>
</li>
      </ul></li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li>
    --port

      <ul>
        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-port">trtllm-serve command line option</a>
</li>
      </ul></li>
      <li>
    --pp_size

      <ul>
        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-pp_size">trtllm-serve command line option</a>
</li>
      </ul></li>
      <li>
    --tokenizer

      <ul>
        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-tokenizer">trtllm-serve command line option</a>
</li>
      </ul></li>
      <li>
    --tp_size

      <ul>
        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-tp_size">trtllm-serve command line option</a>
</li>
      </ul></li>
      <li>
    --trust_remote_code

      <ul>
        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-trust_remote_code">trtllm-serve command line option</a>
</li>
      </ul></li>
  </ul></td>
</tr></table>

<h2 id="_">_</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildCacheConfig.__init__">__init__() (tensorrt_llm.llmapi.BuildCacheConfig method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.__init__">(tensorrt_llm.llmapi.BuildConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.__init__">(tensorrt_llm.llmapi.CalibConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.__init__">(tensorrt_llm.llmapi.CapacitySchedulerPolicy method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.GuidedDecodingParams.__init__">(tensorrt_llm.llmapi.GuidedDecodingParams method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.__init__">(tensorrt_llm.llmapi.KvCacheConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.__init__">(tensorrt_llm.llmapi.LLM method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.__init__">(tensorrt_llm.llmapi.LookaheadDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.__init__">(tensorrt_llm.llmapi.MedusaDecodingConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.__init__">(tensorrt_llm.llmapi.QuantConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.__init__">(tensorrt_llm.llmapi.RequestOutput method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.__init__">(tensorrt_llm.llmapi.SamplingParams method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.__init__">(tensorrt_llm.llmapi.SchedulerConfig method)</a>
</li>
      </ul></li>
  </ul></td>
</tr></table>

<h2 id="A">A</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.abs">abs() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.abs">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.activation">activation() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.add">add() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Conditional.add_input">add_input() (tensorrt_llm.functional.Conditional method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Conditional.add_output">add_output() (tensorrt_llm.functional.Conditional method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.KVCacheManager.add_sequence">add_sequence() (tensorrt_llm.runtime.KVCacheManager method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.add_special_tokens">add_special_tokens (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.alibi">alibi (tensorrt_llm.functional.PositionEmbeddingType attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.alibi_with_scale">alibi_with_scale (tensorrt_llm.functional.PositionEmbeddingType attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.allgather">allgather() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.allreduce">allreduce() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceConfig">AllReduceConfig (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceFusionOp">AllReduceFusionOp (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceParams">AllReduceParams (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceStrategy">AllReduceStrategy (class in tensorrt_llm.functional)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RopeEmbeddingUtils.apply_llama3_scaling">apply_llama3_scaling() (tensorrt_llm.functional.RopeEmbeddingUtils static method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RopeEmbeddingUtils.apply_rotary_pos_emb">apply_rotary_pos_emb() (tensorrt_llm.functional.RopeEmbeddingUtils static method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RopeEmbeddingUtils.apply_rotary_pos_emb_chatglm">apply_rotary_pos_emb_chatglm() (tensorrt_llm.functional.RopeEmbeddingUtils static method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RopeEmbeddingUtils.apply_rotary_pos_emb_cogvlm">apply_rotary_pos_emb_cogvlm() (tensorrt_llm.functional.RopeEmbeddingUtils static method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.arange">arange() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.argmax">argmax() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaForCausalLM.assert_valid_quant_algo">assert_valid_quant_algo() (tensorrt_llm.models.GemmaForCausalLM class method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.assertion">assertion() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.Attention">Attention (class in tensorrt_llm.layers.attention)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.AttentionMaskParams">AttentionMaskParams (class in tensorrt_llm.layers.attention)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AttentionMaskType">AttentionMaskType (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.AttentionParams">AttentionParams (class in tensorrt_llm.layers.attention)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceStrategy.AUTO">AUTO (tensorrt_llm.functional.AllReduceStrategy attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.auto_parallel_config">auto_parallel_config (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.avg_pool2d">avg_pool2d() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.pooling.AvgPool2d">AvgPool2d (class in tensorrt_llm.layers.pooling)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="B">B</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.bad">bad (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.bad_token_ids">bad_token_ids (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.bad_words_list">bad_words_list (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.BaichuanForCausalLM">BaichuanForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.batch_size">batch_size (tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.beam_search_diversity_rate">beam_search_diversity_rate (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.beam_search_diversity_rate">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.beam_width">beam_width (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.bert_attention">bert_attention() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.BertAttention">BertAttention (class in tensorrt_llm.layers.attention)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.BertForQuestionAnswering">BertForQuestionAnswering (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.BertForSequenceClassification">BertForSequenceClassification (class in tensorrt_llm.models)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.BertModel">BertModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.best_of">best_of (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AttentionMaskType.bidirectional">bidirectional (tensorrt_llm.functional.AttentionMaskType attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AttentionMaskType.bidirectionalglm">bidirectionalglm (tensorrt_llm.functional.AttentionMaskType attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AttentionMaskType.blocksparse">blocksparse (tensorrt_llm.functional.AttentionMaskType attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.BlockSparseAttnParams">BlockSparseAttnParams (class in tensorrt_llm.layers.attention)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.BloomForCausalLM">BloomForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.BloomModel">BloomModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.broadcast_helper">broadcast_helper() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.buffer_allocated">buffer_allocated (tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildCacheConfig">BuildCacheConfig (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig">BuildConfig (class in tensorrt_llm.llmapi)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="C">C</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildCacheConfig.cache_root">cache_root (tensorrt_llm.llmapi.BuildCacheConfig attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#id0">(tensorrt_llm.llmapi.BuildCacheConfig property)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.calculate_speculative_resource">calculate_speculative_resource() (tensorrt_llm.llmapi.LookaheadDecodingConfig method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.calib_batch_size">calib_batch_size (tensorrt_llm.llmapi.CalibConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.calib_batches">calib_batches (tensorrt_llm.llmapi.CalibConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.calib_dataset">calib_dataset (tensorrt_llm.llmapi.CalibConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.calib_max_seq_length">calib_max_seq_length (tensorrt_llm.llmapi.CalibConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig">CalibConfig (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.capacity_scheduler_policy">capacity_scheduler_policy (tensorrt_llm.llmapi.SchedulerConfig property)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy">CapacitySchedulerPolicy (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.cast.Cast">Cast (class in tensorrt_llm.layers.cast)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.cast">cast() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.cast">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.categorical_sample">categorical_sample() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AttentionMaskType.causal">causal (tensorrt_llm.functional.AttentionMaskType attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.chatglm">chatglm (tensorrt_llm.functional.PositionEmbeddingType attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.ChatGLMConfig">ChatGLMConfig (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.ChatGLMForCausalLM">ChatGLMForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ChatGLMGenerationSession">ChatGLMGenerationSession (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.ChatGLMModel">ChatGLMModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.DecoderModel.check_config">check_config() (tensorrt_llm.models.DecoderModel method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.DiT.check_config">(tensorrt_llm.models.DiT method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.EncoderModel.check_config">(tensorrt_llm.models.EncoderModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.FalconForCausalLM.check_config">(tensorrt_llm.models.FalconForCausalLM method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.MPTForCausalLM.check_config">(tensorrt_llm.models.MPTForCausalLM method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.OPTForCausalLM.check_config">(tensorrt_llm.models.OPTForCausalLM method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PhiForCausalLM.check_config">(tensorrt_llm.models.PhiForCausalLM method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedModel.check_config">(tensorrt_llm.models.PretrainedModel method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.choices">choices() (tensorrt_llm.functional.PositionEmbeddingType static method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.chunk">chunk() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.clamp_val">clamp_val (tensorrt_llm.llmapi.QuantConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.clip">clip() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.CogVLMAttention">CogVLMAttention (class in tensorrt_llm.layers.attention)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.CogVLMConfig">CogVLMConfig (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.CogVLMForCausalLM">CogVLMForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.CohereForCausalLM">CohereForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.Linear.collect_and_bias">collect_and_bias() (tensorrt_llm.layers.linear.Linear method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.LinearBase.collect_and_bias">(tensorrt_llm.layers.linear.LinearBase method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.RowLinear.collect_and_bias">(tensorrt_llm.layers.linear.RowLinear method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.ColumnLinear">ColumnLinear (in module tensorrt_llm.layers.linear)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.compute_relative_bias">compute_relative_bias() (in module tensorrt_llm.layers.attention)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.concat">concat() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Conditional">Conditional (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.BaichuanForCausalLM.config_class">config_class (tensorrt_llm.models.BaichuanForCausalLM attribute)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.ChatGLMForCausalLM.config_class">(tensorrt_llm.models.ChatGLMForCausalLM attribute)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.CogVLMForCausalLM.config_class">(tensorrt_llm.models.CogVLMForCausalLM attribute)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.CohereForCausalLM.config_class">(tensorrt_llm.models.CohereForCausalLM attribute)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.DbrxForCausalLM.config_class">(tensorrt_llm.models.DbrxForCausalLM attribute)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.DeepseekForCausalLM.config_class">(tensorrt_llm.models.DeepseekForCausalLM attribute)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.EagleForCausalLM.config_class">(tensorrt_llm.models.EagleForCausalLM attribute)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.FalconForCausalLM.config_class">(tensorrt_llm.models.FalconForCausalLM attribute)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaForCausalLM.config_class">(tensorrt_llm.models.GemmaForCausalLM attribute)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTForCausalLM.config_class">(tensorrt_llm.models.GPTForCausalLM attribute)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTJForCausalLM.config_class">(tensorrt_llm.models.GPTJForCausalLM attribute)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.LLaMAForCausalLM.config_class">(tensorrt_llm.models.LLaMAForCausalLM attribute)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.MambaForCausalLM.config_class">(tensorrt_llm.models.MambaForCausalLM attribute)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.MedusaForCausalLm.config_class">(tensorrt_llm.models.MedusaForCausalLm attribute)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.MLLaMAModel.config_class">(tensorrt_llm.models.MLLaMAModel attribute)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.Phi3ForCausalLM.config_class">(tensorrt_llm.models.Phi3ForCausalLM attribute)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PhiForCausalLM.config_class">(tensorrt_llm.models.PhiForCausalLM attribute)</a>
</li>
      </ul></li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.constant">constant() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.constant_to_tensor_">constant_to_tensor_() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.constants_to_tensors_">constants_to_tensors_() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.Session.context">context (tensorrt_llm.runtime.Session property)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.context_chunking_policy">context_chunking_policy (tensorrt_llm.llmapi.SchedulerConfig property)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.context_mem_size">context_mem_size (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.Session.context_mem_size">(tensorrt_llm.runtime.Session property)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.conv.Conv1d">Conv1d (class in tensorrt_llm.layers.conv)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.conv1d">conv1d() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.conv.Conv2d">Conv2d (class in tensorrt_llm.layers.conv)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.conv2d">conv2d() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.conv_kernel">conv_kernel (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.conv_kernel">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.conv_transpose2d">conv_transpose2d() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.conv.ConvTranspose2d">ConvTranspose2d (class in tensorrt_llm.layers.conv)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.cos">cos() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.cp_split_plugin">cp_split_plugin() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.create_allreduce_plugin">create_allreduce_plugin() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.Attention.create_attention_const_params">create_attention_const_params() (tensorrt_llm.layers.attention.Attention static method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RopeEmbeddingUtils.create_fake_weight">create_fake_weight() (tensorrt_llm.functional.RopeEmbeddingUtils static method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.create_runtime_defaults">create_runtime_defaults() (tensorrt_llm.models.PretrainedConfig static method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions">create_sinusoidal_positions() (tensorrt_llm.functional.RopeEmbeddingUtils static method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_for_attention_plugin">create_sinusoidal_positions_for_attention_plugin() (tensorrt_llm.functional.RopeEmbeddingUtils static method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_for_cogvlm_attention_plugin">create_sinusoidal_positions_for_cogvlm_attention_plugin() (tensorrt_llm.functional.RopeEmbeddingUtils static method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_for_deepseek_attention_plugin">create_sinusoidal_positions_for_deepseek_attention_plugin() (tensorrt_llm.functional.RopeEmbeddingUtils static method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_long_rope">create_sinusoidal_positions_long_rope() (tensorrt_llm.functional.RopeEmbeddingUtils method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.cross_attention">cross_attention (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.cross_attention">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.cross_kv_cache_fraction">cross_kv_cache_fraction (tensorrt_llm.llmapi.KvCacheConfig property)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.cuda_graph_mode">cuda_graph_mode (tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.cuda_stream_guard">cuda_stream_guard() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.cuda_stream_sync">cuda_stream_sync() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.cumsum">cumsum() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AttentionMaskType.custom_mask">custom_mask (tensorrt_llm.functional.AttentionMaskType attribute)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="D">D</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.DbrxConfig">DbrxConfig (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.DbrxForCausalLM">DbrxForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.debug_mode">debug_mode (tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.debug_tensors_to_save">debug_tensors_to_save (tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.decode">decode() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.decode_batch">decode_batch() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.decode_regular">decode_regular() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.decode_stream">decode_stream() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.decode_words_list">decode_words_list() (in module tensorrt_llm.runtime)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.DecoderModel">DecoderModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.DeepseekForCausalLM">DeepseekForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.DeepseekV2Attention">DeepseekV2Attention (class in tensorrt_llm.layers.attention)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.DeepseekV2ForCausalLM">DeepseekV2ForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.CogVLMForCausalLM.default_plugin_config">default_plugin_config() (tensorrt_llm.models.CogVLMForCausalLM method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.LLaMAForCausalLM.default_plugin_config">(tensorrt_llm.models.LLaMAForCausalLM method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.detokenize">detokenize (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.device">device (tensorrt_llm.llmapi.CalibConfig attribute)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.device">(tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.DimRange">DimRange (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.SideStreamIDType.disable">disable (tensorrt_llm.functional.SideStreamIDType attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.DiT">DiT (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.div">div() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.SpeculativeDecodingMode.DRAFT_TOKENS_EXTERNAL">DRAFT_TOKENS_EXTERNAL (tensorrt_llm.models.SpeculativeDecodingMode attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.dry_run">dry_run (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.dtype">dtype (tensorrt_llm.functional.Tensor property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.dtype">(tensorrt_llm.runtime.GenerationSession property)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.dtype">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.dtype">(tensorrt_llm.runtime.ModelRunner property)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.dtype">(tensorrt_llm.runtime.ModelRunnerCpp property)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.TensorInfo.dtype">(tensorrt_llm.runtime.TensorInfo attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.dump_debug_buffers">dump_debug_buffers() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RotaryScalingType.dynamic">dynamic (tensorrt_llm.functional.RotaryScalingType attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig.dynamic_batch_config">dynamic_batch_config (tensorrt_llm.llmapi.SchedulerConfig property)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="E">E</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.SpeculativeDecodingMode.EAGLE">EAGLE (tensorrt_llm.models.SpeculativeDecodingMode attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.EagleForCausalLM">EagleForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.early_stop_criteria">early_stop_criteria() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.early_stopping">early_stopping (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.early_stopping">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.einsum">einsum() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.elementwise_binary">elementwise_binary() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.Embedding">Embedding (class in tensorrt_llm.layers.embedding)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.embedding">embedding() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.embedding_bias">embedding_bias (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.enable_block_reuse">enable_block_reuse (tensorrt_llm.llmapi.KvCacheConfig property)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.enable_debug_output">enable_debug_output (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.EncDecModelRunner">EncDecModelRunner (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.EncDecModelRunner.encoder_run">encoder_run() (tensorrt_llm.runtime.EncDecModelRunner method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.EncoderModel">EncoderModel (class in tensorrt_llm.models)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.end_id">end_id (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.end_id">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.Session.engine">engine (tensorrt_llm.runtime.Session property)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.engine_inspector">engine_inspector (tensorrt_llm.runtime.GenerationSession property)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.eq">eq() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.event_buffer_max_size">event_buffer_max_size (tensorrt_llm.llmapi.KvCacheConfig property)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.exclude_input_from_output">exclude_input_from_output (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.exclude_modules">exclude_modules (tensorrt_llm.llmapi.QuantConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.exp">exp() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.expand">expand() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.expand_dims">expand_dims() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.expand_dims_like">expand_dims_like() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.expand_mask">expand_mask() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.SpeculativeDecodingMode.EXPLICIT_DRAFT_TOKENS">EXPLICIT_DRAFT_TOKENS (tensorrt_llm.models.SpeculativeDecodingMode attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.external_draft_tokens_config">external_draft_tokens_config (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="F">F</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.FalconConfig">FalconConfig (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.FalconForCausalLM">FalconForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.FalconModel">FalconModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.FusedGatedMLP.fc_gate">fc_gate() (tensorrt_llm.layers.mlp.FusedGatedMLP method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.fc_gate_lora">fc_gate_lora() (in module tensorrt_llm.layers.mlp)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.FusedGatedMLP.fc_gate_plugin">fc_gate_plugin() (tensorrt_llm.layers.mlp.FusedGatedMLP method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.AttentionParams.fill_attention_const_params_for_long_rope">fill_attention_const_params_for_long_rope() (tensorrt_llm.layers.attention.AttentionParams method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.AttentionParams.fill_attention_const_params_for_rope">fill_attention_const_params_for_rope() (tensorrt_llm.layers.attention.AttentionParams method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.Attention.fill_attention_params">fill_attention_params() (tensorrt_llm.layers.attention.Attention static method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.fill_empty_fields_from_runtime_defaults">fill_empty_fields_from_runtime_defaults() (tensorrt_llm.llmapi.KvCacheConfig method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.KeyValueCacheParams.fill_none_tensor_list">fill_none_tensor_list() (tensorrt_llm.layers.attention.KeyValueCacheParams method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.filter_medusa_logits">filter_medusa_logits() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.finalize_decoder">finalize_decoder() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.find_best_medusa_path">find_best_medusa_path() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.first_layer">first_layer (tensorrt_llm.runtime.GenerationSession property)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.flatten">flatten() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.flatten">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.flip">flip() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.floordiv">floordiv() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#c.FMT_DIM">FMT_DIM (C macro)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.for_each_rank">for_each_rank() (tensorrt_llm.models.PretrainedConfig method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.force_num_profiles">force_num_profiles (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.activation.Mish.forward">forward() (tensorrt_llm.layers.activation.Mish method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.Attention.forward">(tensorrt_llm.layers.attention.Attention method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.BertAttention.forward">(tensorrt_llm.layers.attention.BertAttention method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.CogVLMAttention.forward">(tensorrt_llm.layers.attention.CogVLMAttention method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.DeepseekV2Attention.forward">(tensorrt_llm.layers.attention.DeepseekV2Attention method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.cast.Cast.forward">(tensorrt_llm.layers.cast.Cast method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.conv.Conv1d.forward">(tensorrt_llm.layers.conv.Conv1d method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.conv.Conv2d.forward">(tensorrt_llm.layers.conv.Conv2d method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.conv.ConvTranspose2d.forward">(tensorrt_llm.layers.conv.ConvTranspose2d method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.Embedding.forward">(tensorrt_llm.layers.embedding.Embedding method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.PromptTuningEmbedding.forward">(tensorrt_llm.layers.embedding.PromptTuningEmbedding method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.LinearBase.forward">(tensorrt_llm.layers.linear.LinearBase method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.FusedGatedMLP.forward">(tensorrt_llm.layers.mlp.FusedGatedMLP method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.GatedMLP.forward">(tensorrt_llm.layers.mlp.GatedMLP method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.MLP.forward">(tensorrt_llm.layers.mlp.MLP method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.normalization.GroupNorm.forward">(tensorrt_llm.layers.normalization.GroupNorm method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.normalization.LayerNorm.forward">(tensorrt_llm.layers.normalization.LayerNorm method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.normalization.RmsNorm.forward">(tensorrt_llm.layers.normalization.RmsNorm method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.pooling.AvgPool2d.forward">(tensorrt_llm.layers.pooling.AvgPool2d method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.BertForQuestionAnswering.forward">(tensorrt_llm.models.BertForQuestionAnswering method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.BertForSequenceClassification.forward">(tensorrt_llm.models.BertForSequenceClassification method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.BertModel.forward">(tensorrt_llm.models.BertModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.BloomModel.forward">(tensorrt_llm.models.BloomModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.ChatGLMModel.forward">(tensorrt_llm.models.ChatGLMModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.DecoderModel.forward">(tensorrt_llm.models.DecoderModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.DiT.forward">(tensorrt_llm.models.DiT method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.EagleForCausalLM.forward">(tensorrt_llm.models.EagleForCausalLM method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.EncoderModel.forward">(tensorrt_llm.models.EncoderModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.FalconModel.forward">(tensorrt_llm.models.FalconModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTJModel.forward">(tensorrt_llm.models.GPTJModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTModel.forward">(tensorrt_llm.models.GPTModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTNeoXModel.forward">(tensorrt_llm.models.GPTNeoXModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.LLaMAModel.forward">(tensorrt_llm.models.LLaMAModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.MambaForCausalLM.forward">(tensorrt_llm.models.MambaForCausalLM method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.MLLaMAModel.forward">(tensorrt_llm.models.MLLaMAModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.MPTModel.forward">(tensorrt_llm.models.MPTModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.OPTModel.forward">(tensorrt_llm.models.OPTModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.Phi3Model.forward">(tensorrt_llm.models.Phi3Model method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PhiModel.forward">(tensorrt_llm.models.PhiModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.RecurrentGemmaForCausalLM.forward">(tensorrt_llm.models.RecurrentGemmaForCausalLM method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.ReDrafterForCausalLM.forward">(tensorrt_llm.models.ReDrafterForCausalLM method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.WhisperEncoder.forward">(tensorrt_llm.models.WhisperEncoder method)</a>
</li>
      </ul></li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.DiT.forward_with_cfg">forward_with_cfg() (tensorrt_llm.models.DiT method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.DiT.forward_without_cfg">forward_without_cfg() (tensorrt_llm.models.DiT method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.FP8">FP8 (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.FP8_PER_CHANNEL_PER_TOKEN">FP8_PER_CHANNEL_PER_TOKEN (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.free_gpu_memory_fraction">free_gpu_memory_fraction (tensorrt_llm.llmapi.KvCacheConfig property)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.frequency_penalty">frequency_penalty (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.frequency_penalty">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.SpeculativeDecodingMode.from_arguments">from_arguments() (tensorrt_llm.models.SpeculativeDecodingMode static method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.from_checkpoint">from_checkpoint() (tensorrt_llm.models.PretrainedConfig class method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedModel.from_checkpoint">(tensorrt_llm.models.PretrainedModel class method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedModel.from_config">from_config() (tensorrt_llm.models.PretrainedModel class method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.from_dict">from_dict() (tensorrt_llm.llmapi.BuildConfig class method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.from_dict">(tensorrt_llm.llmapi.CalibConfig class method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.from_dict">(tensorrt_llm.llmapi.QuantConfig class method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.from_dict">(tensorrt_llm.models.PretrainedConfig class method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.from_dir">from_dir() (tensorrt_llm.runtime.ModelRunner class method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.from_dir">(tensorrt_llm.runtime.ModelRunnerCpp class method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.EncDecModelRunner.from_engine">from_engine() (tensorrt_llm.runtime.EncDecModelRunner class method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.from_engine">(tensorrt_llm.runtime.ModelRunner class method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.Session.from_engine">(tensorrt_llm.runtime.Session static method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.BaichuanForCausalLM.from_hugging_face">from_hugging_face() (tensorrt_llm.models.BaichuanForCausalLM class method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.ChatGLMConfig.from_hugging_face">(tensorrt_llm.models.ChatGLMConfig class method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.ChatGLMForCausalLM.from_hugging_face">(tensorrt_llm.models.ChatGLMForCausalLM class method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.CogVLMForCausalLM.from_hugging_face">(tensorrt_llm.models.CogVLMForCausalLM class method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.CohereForCausalLM.from_hugging_face">(tensorrt_llm.models.CohereForCausalLM class method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.DeepseekForCausalLM.from_hugging_face">(tensorrt_llm.models.DeepseekForCausalLM class method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.DeepseekV2ForCausalLM.from_hugging_face">(tensorrt_llm.models.DeepseekV2ForCausalLM class method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.FalconConfig.from_hugging_face">(tensorrt_llm.models.FalconConfig class method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.FalconForCausalLM.from_hugging_face">(tensorrt_llm.models.FalconForCausalLM class method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaConfig.from_hugging_face">(tensorrt_llm.models.GemmaConfig class method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaForCausalLM.from_hugging_face">(tensorrt_llm.models.GemmaForCausalLM class method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTConfig.from_hugging_face">(tensorrt_llm.models.GPTConfig class method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTForCausalLM.from_hugging_face">(tensorrt_llm.models.GPTForCausalLM class method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTJConfig.from_hugging_face">(tensorrt_llm.models.GPTJConfig class method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTJForCausalLM.from_hugging_face">(tensorrt_llm.models.GPTJForCausalLM class method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.LLaMAConfig.from_hugging_face">(tensorrt_llm.models.LLaMAConfig class method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.LLaMAForCausalLM.from_hugging_face">(tensorrt_llm.models.LLaMAForCausalLM class method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.MambaForCausalLM.from_hugging_face">(tensorrt_llm.models.MambaForCausalLM class method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.MedusaConfig.from_hugging_face">(tensorrt_llm.models.MedusaConfig class method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.MedusaForCausalLm.from_hugging_face">(tensorrt_llm.models.MedusaForCausalLm class method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.MLLaMAModel.from_hugging_face">(tensorrt_llm.models.MLLaMAModel class method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.Phi3ForCausalLM.from_hugging_face">(tensorrt_llm.models.Phi3ForCausalLM class method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PhiForCausalLM.from_hugging_face">(tensorrt_llm.models.PhiForCausalLM class method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.from_json_file">from_json_file() (tensorrt_llm.llmapi.BuildConfig class method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.from_json_file">(tensorrt_llm.models.PretrainedConfig class method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.LLaMAConfig.from_meta_ckpt">from_meta_ckpt() (tensorrt_llm.models.LLaMAConfig class method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.LLaMAForCausalLM.from_meta_ckpt">(tensorrt_llm.models.LLaMAForCausalLM class method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTConfig.from_nemo">from_nemo() (tensorrt_llm.models.GPTConfig class method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTForCausalLM.from_nemo">(tensorrt_llm.models.GPTForCausalLM class method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.Session.from_serialized_engine">from_serialized_engine() (tensorrt_llm.runtime.Session static method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.from_string">from_string() (tensorrt_llm.functional.PositionEmbeddingType static method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RotaryScalingType.from_string">(tensorrt_llm.functional.RotaryScalingType static method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.FusedGatedMLP">FusedGatedMLP (class in tensorrt_llm.layers.mlp)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.MLPType.FusedGatedMLP">(tensorrt_llm.functional.MLPType attribute)</a>
</li>
      </ul></li>
  </ul></td>
</tr></table>

<h2 id="G">G</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.GatedMLP">GatedMLP (class in tensorrt_llm.layers.mlp)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.MLPType.GatedMLP">(tensorrt_llm.functional.MLPType attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.gather">gather() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.gather_context_logits">gather_context_logits (tensorrt_llm.llmapi.BuildConfig attribute)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.gather_context_logits">(tensorrt_llm.runtime.GenerationSession property)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.gather_context_logits">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.gather_context_logits">(tensorrt_llm.runtime.ModelRunner property)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.gather_context_logits">(tensorrt_llm.runtime.ModelRunnerCpp property)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.gather_generation_logits">gather_generation_logits (tensorrt_llm.llmapi.BuildConfig attribute)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.gather_generation_logits">(tensorrt_llm.runtime.GenerationSession property)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.gather_generation_logits">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.gather_generation_logits">(tensorrt_llm.runtime.ModelRunner property)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.gather_generation_logits">(tensorrt_llm.runtime.ModelRunnerCpp property)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.gather_last_token_logits">gather_last_token_logits() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.gather_nd">gather_nd() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.gegelu">gegelu() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.geglu">geglu() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.gelu">gelu() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.gemm_swiglu">gemm_swiglu() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaConfig.GEMMA2_ADDED_FIELDS">GEMMA2_ADDED_FIELDS (tensorrt_llm.models.GemmaConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaConfig.gemma2_config">gemma2_config() (tensorrt_llm.models.GemmaConfig method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaConfig.GEMMA_ADDED_FIELDS">GEMMA_ADDED_FIELDS (tensorrt_llm.models.GemmaConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaConfig">GemmaConfig (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaForCausalLM">GemmaForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.generate">generate() (tensorrt_llm.llmapi.LLM method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.EncDecModelRunner.generate">(tensorrt_llm.runtime.EncDecModelRunner method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.generate">(tensorrt_llm.runtime.ModelRunner method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.generate">(tensorrt_llm.runtime.ModelRunnerCpp method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.generate">(tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.QWenForCausalLMGenerationSession.generate">(tensorrt_llm.runtime.QWenForCausalLMGenerationSession method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.generate_alibi_biases">generate_alibi_biases() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.generate_alibi_slopes">generate_alibi_slopes() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.generate_async">generate_async() (tensorrt_llm.llmapi.LLM method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.generate_logn_scaling">generate_logn_scaling() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSequence">GenerationSequence (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession">GenerationSession (class in tensorrt_llm.runtime)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSequence.get_batch_idx">get_batch_idx() (tensorrt_llm.runtime.GenerationSequence method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.KVCacheManager.get_block_offsets">get_block_offsets() (tensorrt_llm.runtime.KVCacheManager method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.get_config_group">get_config_group() (tensorrt_llm.models.PretrainedConfig method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.KeyValueCacheParams.get_first_past_key_value">get_first_past_key_value() (tensorrt_llm.layers.attention.KeyValueCacheParams method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.get_modelopt_kv_cache_dtype">get_modelopt_kv_cache_dtype() (tensorrt_llm.llmapi.QuantConfig method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.get_modelopt_qformat">get_modelopt_qformat() (tensorrt_llm.llmapi.QuantConfig method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.get_next_medusa_tokens">get_next_medusa_tokens() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.get_num_heads_kv">get_num_heads_kv() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.get_parent">get_parent() (tensorrt_llm.functional.Tensor method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.get_quant_cfg">get_quant_cfg() (tensorrt_llm.llmapi.QuantConfig method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.get_quant_cfg">(tensorrt_llm.models.PretrainedConfig method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.get_rope_index">get_rope_index() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSequence.get_seq_idx">get_seq_idx() (tensorrt_llm.runtime.GenerationSequence method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.get_users">get_users() (tensorrt_llm.functional.Tensor method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.get_visual_features">get_visual_features() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.LinearBase.get_weight">get_weight() (tensorrt_llm.layers.linear.LinearBase method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.gpt_attention">gpt_attention() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.gpt_attention_plugin">gpt_attention_plugin (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTConfig">GPTConfig (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTForCausalLM">GPTForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTJConfig">GPTJConfig (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTJForCausalLM">GPTJForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTJModel">GPTJModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTModel">GPTModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTNeoXForCausalLM">GPTNeoXForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTNeoXModel">GPTNeoXModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.gpu_weights_percent">gpu_weights_percent (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.GuidedDecodingParams.grammar">grammar (tensorrt_llm.llmapi.GuidedDecodingParams attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.greedy_decoding">greedy_decoding (tensorrt_llm.llmapi.SamplingParams property)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.group_norm">group_norm() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.group_size">group_size (tensorrt_llm.llmapi.QuantConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.normalization.GroupNorm">GroupNorm (class in tensorrt_llm.layers.normalization)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.LayerNormType.GroupNorm">(tensorrt_llm.functional.LayerNormType attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.gt">gt() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.GUARANTEED_NO_EVICT">GUARANTEED_NO_EVICT (tensorrt_llm.llmapi.CapacitySchedulerPolicy attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.guided_decoding">guided_decoding (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.GuidedDecodingParams">GuidedDecodingParams (class in tensorrt_llm.llmapi)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="H">H</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.handle_per_step">handle_per_step() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput.handle_response">handle_response() (tensorrt_llm.llmapi.RequestOutput method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceParams.has_affine">has_affine() (tensorrt_llm.functional.AllReduceParams method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceParams.has_bias">has_bias() (tensorrt_llm.functional.AllReduceParams method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.has_config_group">has_config_group() (tensorrt_llm.models.PretrainedConfig method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.has_position_embedding">has_position_embedding (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.has_position_embedding">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceParams.has_scale">has_scale() (tensorrt_llm.functional.AllReduceParams method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.has_token_type_embedding">has_token_type_embedding (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.has_token_type_embedding">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.has_zero_point">has_zero_point (tensorrt_llm.llmapi.QuantConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.head_size">head_size (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.head_size">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.hidden_size">hidden_size (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.hidden_size">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.hidden_size">(tensorrt_llm.runtime.ModelRunner property)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.hidden_size">(tensorrt_llm.runtime.ModelRunnerCpp property)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.host_cache_size">host_cache_size (tensorrt_llm.llmapi.KvCacheConfig property)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="I">I</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.identity">identity() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.ignore_eos">ignore_eos (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.include_stop_str_in_output">include_stop_str_in_output (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.index_select">index_select() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.Session.infer_shapes">infer_shapes() (tensorrt_llm.runtime.Session method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.init_image_encoder">init_image_encoder() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.init_llm">init_llm() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.init_processor">init_processor() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.init_tokenizer">init_tokenizer() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.input_timing_cache">input_timing_cache (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.INT8">INT8 (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.int_clip">int_clip() (in module tensorrt_llm.functional)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.interpolate">interpolate() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.is_alibi">is_alibi() (tensorrt_llm.functional.PositionEmbeddingType method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.is_dynamic">is_dynamic() (tensorrt_llm.functional.Tensor method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.is_gated_activation">is_gated_activation() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaConfig.is_gemma_2">is_gemma_2 (tensorrt_llm.models.GemmaConfig property)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.is_medusa_mode">is_medusa_mode (tensorrt_llm.runtime.GenerationSession property)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.is_mrope">is_mrope() (tensorrt_llm.functional.PositionEmbeddingType method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.is_redrafter_mode">is_redrafter_mode (tensorrt_llm.runtime.GenerationSession property)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.is_rope">is_rope() (tensorrt_llm.functional.PositionEmbeddingType method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.is_trt_wrapper">is_trt_wrapper() (tensorrt_llm.functional.Tensor method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.AttentionParams.is_valid">is_valid() (tensorrt_llm.layers.attention.AttentionParams method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.KeyValueCacheParams.is_valid">(tensorrt_llm.layers.attention.KeyValueCacheParams method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.AttentionParams.is_valid_cross_attn">is_valid_cross_attn() (tensorrt_llm.layers.attention.AttentionParams method)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="J">J</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.GuidedDecodingParams.json">json (tensorrt_llm.llmapi.GuidedDecodingParams attribute)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.GuidedDecodingParams.json_object">json_object (tensorrt_llm.llmapi.GuidedDecodingParams attribute)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="K">K</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.KeyValueCacheParams">KeyValueCacheParams (class in tensorrt_llm.layers.attention)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.kv_cache_quant_algo">kv_cache_quant_algo (tensorrt_llm.llmapi.QuantConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.kv_cache_type">kv_cache_type (tensorrt_llm.llmapi.BuildConfig attribute)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.kv_cache_type">(tensorrt_llm.runtime.GenerationSession property)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.kv_cache_type">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.kv_dtype">kv_dtype (tensorrt_llm.models.PretrainedConfig property)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig">KvCacheConfig (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.KVCacheManager">KVCacheManager (class in tensorrt_llm.runtime)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="L">L</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.last_layer">last_layer (tensorrt_llm.runtime.GenerationSession property)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceFusionOp.LAST_PROCESS_FOR_UB">LAST_PROCESS_FOR_UB (tensorrt_llm.functional.AllReduceFusionOp attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.layer_norm">layer_norm() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.layer_quant_mode">layer_quant_mode (tensorrt_llm.llmapi.QuantConfig property)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.layer_types">layer_types (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.normalization.LayerNorm">LayerNorm (class in tensorrt_llm.layers.normalization)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.LayerNormType.LayerNorm">(tensorrt_llm.functional.LayerNormType attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.LayerNormPositionType">LayerNormPositionType (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.LayerNormType">LayerNormType (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.learned_absolute">learned_absolute (tensorrt_llm.functional.PositionEmbeddingType attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.length_penalty">length_penalty (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.length_penalty">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.Linear">Linear (class in tensorrt_llm.layers.linear)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RotaryScalingType.linear">linear (tensorrt_llm.functional.RotaryScalingType attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.LinearBase">LinearBase (class in tensorrt_llm.layers.linear)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RotaryScalingType.llama3">llama3 (tensorrt_llm.functional.RotaryScalingType attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.LLaMAConfig">LLaMAConfig (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.LLaMAForCausalLM">LLaMAForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.LLaMAModel">LLaMAModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM">LLM (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedModel.load">load() (tensorrt_llm.models.PretrainedModel method)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.load_test_image">load_test_image() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.locate_accepted_draft_tokens">locate_accepted_draft_tokens() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.location">location (tensorrt_llm.functional.Tensor property)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.log">log() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.log">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.log_softmax">log_softmax() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.logits_post_processor_name">logits_post_processor_name (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.LogitsProcessor">LogitsProcessor (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.LogitsProcessorList">LogitsProcessorList (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.long_rope">long_rope (tensorrt_llm.functional.PositionEmbeddingType attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RotaryScalingType.longrope">longrope (tensorrt_llm.functional.RotaryScalingType attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.lookahead_config">lookahead_config (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.SpeculativeDecodingMode.LOOKAHEAD_DECODING">LOOKAHEAD_DECODING (tensorrt_llm.models.SpeculativeDecodingMode attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig">LookaheadDecodingConfig (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.lora_config">lora_config (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.lora_plugin">lora_plugin (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.lora_plugin">lora_plugin() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.lora_target_modules">lora_target_modules (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.low_latency_gemm">low_latency_gemm() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.low_latency_gemm_swiglu">low_latency_gemm_swiglu() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.lt">lt() (in module tensorrt_llm.functional)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="M">M</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.make_causal_mask">make_causal_mask() (in module tensorrt_llm.layers.attention)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.mamba_conv1d">mamba_conv1d() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.mamba_conv1d_plugin">mamba_conv1d_plugin (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.MambaForCausalLM">MambaForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.mapping">mapping (tensorrt_llm.runtime.GenerationSession attribute)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.mapping">(tensorrt_llm.runtime.ModelRunner property)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.mark_output">mark_output() (tensorrt_llm.functional.Tensor method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.masked_scatter">masked_scatter() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.masked_select">masked_select() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.matmul">matmul() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.max">max() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.max">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.max_attention_window">max_attention_window (tensorrt_llm.llmapi.KvCacheConfig property)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.max_attention_window_size">max_attention_window_size (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.max_batch_size">max_batch_size (tensorrt_llm.llmapi.BuildConfig attribute)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.max_batch_size">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.max_beam_width">max_beam_width (tensorrt_llm.llmapi.BuildConfig attribute)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.max_beam_width">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildCacheConfig.max_cache_storage_gb">max_cache_storage_gb (tensorrt_llm.llmapi.BuildCacheConfig attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#id1">(tensorrt_llm.llmapi.BuildCacheConfig property)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.max_draft_len">max_draft_len (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.max_draft_tokens">max_draft_tokens (tensorrt_llm.runtime.GenerationSession property)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.max_encoder_input_len">max_encoder_input_len (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.max_input_len">max_input_len (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.max_medusa_tokens">max_medusa_tokens (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.max_new_tokens">max_new_tokens (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.max_new_tokens">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.max_ngram_size">max_ngram_size (tensorrt_llm.llmapi.LookaheadDecodingConfig property)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.max_num_tokens">max_num_tokens (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.max_prompt_embedding_table_size">max_prompt_embedding_table_size (tensorrt_llm.llmapi.BuildConfig attribute)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.max_prompt_embedding_table_size">(tensorrt_llm.runtime.GenerationSession property)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.max_prompt_embedding_table_size">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.max_prompt_embedding_table_size">(tensorrt_llm.runtime.ModelRunner property)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.max_prompt_embedding_table_size">(tensorrt_llm.runtime.ModelRunnerCpp property)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildCacheConfig.max_records">max_records (tensorrt_llm.llmapi.BuildCacheConfig attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#id2">(tensorrt_llm.llmapi.BuildCacheConfig property)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.max_seq_len">max_seq_len (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.max_sequence_length">max_sequence_length (tensorrt_llm.runtime.ModelRunner property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.max_sequence_length">(tensorrt_llm.runtime.ModelRunnerCpp property)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.max_tokens">max_tokens (tensorrt_llm.llmapi.KvCacheConfig property)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.max_tokens">(tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.MAX_UTILIZATION">MAX_UTILIZATION (tensorrt_llm.llmapi.CapacitySchedulerPolicy attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.max_verification_set_size">max_verification_set_size (tensorrt_llm.llmapi.LookaheadDecodingConfig property)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LookaheadDecodingConfig.max_window_size">max_window_size (tensorrt_llm.llmapi.LookaheadDecodingConfig property)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.maximum">maximum() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.mean">mean() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.mean">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.SpeculativeDecodingMode.MEDUSA">MEDUSA (tensorrt_llm.models.SpeculativeDecodingMode attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.medusa_choices">medusa_choices (tensorrt_llm.llmapi.MedusaDecodingConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.medusa_decode_and_verify">medusa_decode_and_verify() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.medusa_paths">medusa_paths (tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.medusa_position_offsets">medusa_position_offsets (tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.medusa_temperature">medusa_temperature (tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.medusa_topks">medusa_topks (tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.medusa_tree_ids">medusa_tree_ids (tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.MedusaConfig">MedusaConfig (class in tensorrt_llm.models)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig">MedusaDecodingConfig (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.MedusaForCausalLm">MedusaForCausalLm (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.min">min() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.min_length">min_length (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.min_length">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.min_tokens">min_tokens (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.minimum">minimum() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.activation.Mish">Mish (class in tensorrt_llm.layers.activation)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.MIXED_PRECISION">MIXED_PRECISION (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.MLLaMAModel">MLLaMAModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.MLP">MLP (class in tensorrt_llm.layers.mlp)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.MLPType.MLP">(tensorrt_llm.functional.MLPType attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.MLPType">MLPType (class in tensorrt_llm.functional)</a>
</li>
      <li>
    MODEL

      <ul>
        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-arg-MODEL">trtllm-serve command line option</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.model_name">model_name (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig">ModelConfig (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner">ModelRunner (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp">ModelRunnerCpp (class in tensorrt_llm.runtime)</a>
</li>
      <li>
    module

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#module-tensorrt_llm">tensorrt_llm</a>, <a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm">[1]</a>, <a href="python-api/tensorrt_llm.models.html#module-tensorrt_llm">[2]</a>, <a href="python-api/tensorrt_llm.plugin.html#module-tensorrt_llm">[3]</a>, <a href="python-api/tensorrt_llm.quantization.html#module-tensorrt_llm">[4]</a>, <a href="python-api/tensorrt_llm.runtime.html#module-tensorrt_llm">[5]</a>
</li>
        <li><a href="python-api/tensorrt_llm.functional.html#module-tensorrt_llm.functional">tensorrt_llm.functional</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.activation">tensorrt_llm.layers.activation</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.attention">tensorrt_llm.layers.attention</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.cast">tensorrt_llm.layers.cast</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.conv">tensorrt_llm.layers.conv</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.embedding">tensorrt_llm.layers.embedding</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.linear">tensorrt_llm.layers.linear</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.mlp">tensorrt_llm.layers.mlp</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.normalization">tensorrt_llm.layers.normalization</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.pooling">tensorrt_llm.layers.pooling</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#module-tensorrt_llm.models">tensorrt_llm.models</a>
</li>
        <li><a href="python-api/tensorrt_llm.plugin.html#module-tensorrt_llm.plugin">tensorrt_llm.plugin</a>
</li>
        <li><a href="python-api/tensorrt_llm.quantization.html#module-tensorrt_llm.quantization">tensorrt_llm.quantization</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#module-tensorrt_llm.runtime">tensorrt_llm.runtime</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.modulo">modulo() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.SideStreamIDType.moe">moe (tensorrt_llm.functional.SideStreamIDType attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.monitor_memory">monitor_memory (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.MPTForCausalLM">MPTForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.MPTModel">MPTModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.mrope">mrope (tensorrt_llm.functional.PositionEmbeddingType attribute)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RotaryScalingType.mrope">(tensorrt_llm.functional.RotaryScalingType attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.MropeParams">MropeParams (class in tensorrt_llm.layers.attention)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.mul">mul() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner">MultimodalModelRunner (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.LinearBase.multiply_and_lora">multiply_and_lora() (tensorrt_llm.layers.linear.LinearBase method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.LinearBase.multiply_collect">multiply_collect() (tensorrt_llm.layers.linear.LinearBase method)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="N">N</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.n">n (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.name">name (tensorrt_llm.functional.Tensor property)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.name">(tensorrt_llm.llmapi.CapacitySchedulerPolicy property)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.TensorInfo.name">(tensorrt_llm.runtime.TensorInfo attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaForCausalLM.NATIVE_QUANT_FLOW">NATIVE_QUANT_FLOW (tensorrt_llm.models.GemmaForCausalLM attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceStrategy.NCCL">NCCL (tensorrt_llm.functional.AllReduceStrategy attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.ndim">ndim() (tensorrt_llm.functional.Tensor method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.network">network (tensorrt_llm.functional.Tensor property)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.next_medusa_input_ids">next_medusa_input_ids() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.NO_QUANT">NO_QUANT (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.no_repeat_ngram_size">no_repeat_ngram_size (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.no_repeat_ngram_size">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.non_gated_version">non_gated_version() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceFusionOp.NONE">NONE (tensorrt_llm.functional.AllReduceFusionOp attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RotaryScalingType.none">none (tensorrt_llm.functional.RotaryScalingType attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.SpeculativeDecodingMode.NONE">NONE (tensorrt_llm.models.SpeculativeDecodingMode attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.nonzero">nonzero() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.NoStatsAvailable">NoStatsAvailable (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.not_op">not_op() (in module tensorrt_llm.functional)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.num_beams">num_beams (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.num_draft_tokens">num_draft_tokens (tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.GuidedDecodingParams.num_guides">num_guides (tensorrt_llm.llmapi.GuidedDecodingParams property)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.num_heads">num_heads (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.num_heads">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.num_heads">(tensorrt_llm.runtime.ModelRunner property)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.num_heads">(tensorrt_llm.runtime.ModelRunnerCpp property)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.num_kv_heads">num_kv_heads (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.num_kv_heads_per_cross_attn_layer">num_kv_heads_per_cross_attn_layer (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.num_kv_heads_per_layer">num_kv_heads_per_layer (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.num_layers">num_layers (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.num_layers">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.num_layers">(tensorrt_llm.runtime.ModelRunner property)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.num_layers">(tensorrt_llm.runtime.ModelRunnerCpp property)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.MedusaDecodingConfig.num_medusa_heads">num_medusa_heads (tensorrt_llm.llmapi.MedusaDecodingConfig attribute)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.num_medusa_heads">(tensorrt_llm.runtime.GenerationSession property)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.num_medusa_heads">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.num_return_sequences">num_return_sequences (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.num_return_sequences">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="_cpp_gen/runtime.html#_CPPv48nvinfer1">nvinfer1 (C++ type)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="O">O</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.onboard_blocks">onboard_blocks (tensorrt_llm.llmapi.KvCacheConfig property)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceStrategy.ONESHOT">ONESHOT (tensorrt_llm.functional.AllReduceStrategy attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.op_and">op_and() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.op_or">op_or() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.opt_batch_size">opt_batch_size (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.opt_num_tokens">opt_num_tokens (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.OPTForCausalLM">OPTForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.OPTModel">OPTModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.outer">outer() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.output_cum_log_probs">output_cum_log_probs (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.output_log_probs">output_log_probs (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.output_sequence_lengths">output_sequence_lengths (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.output_timing_cache">output_timing_cache (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="P">P</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.pad_id">pad_id (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.pad_id">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AttentionMaskType.padding">padding (tensorrt_llm.functional.AttentionMaskType attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.paged_kv_cache">paged_kv_cache (tensorrt_llm.runtime.GenerationSession property)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.paged_state">paged_state (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.paged_state">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.permute">permute() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.permute">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.Phi3ForCausalLM">Phi3ForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.Phi3Model">Phi3Model (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PhiForCausalLM">PhiForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PhiModel">PhiModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.plugin_config">plugin_config (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig">PluginConfig (class in tensorrt_llm.plugin)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType">PositionEmbeddingType (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.LayerNormPositionType.post_layernorm">post_layernorm (tensorrt_llm.functional.LayerNormPositionType attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.Attention.postprocess">postprocess() (tensorrt_llm.layers.attention.Attention method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.Embedding.postprocess">(tensorrt_llm.layers.embedding.Embedding method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.Linear.postprocess">(tensorrt_llm.layers.linear.Linear method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.pow">pow() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.pp_communicate_final_output_ids">pp_communicate_final_output_ids() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.pp_communicate_new_tokens">pp_communicate_new_tokens() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.LayerNormPositionType.pre_layernorm">pre_layernorm (tensorrt_llm.functional.LayerNormPositionType attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.pre_quant_scale">pre_quant_scale (tensorrt_llm.llmapi.QuantConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.DecoderModel.precompute_relative_attention_bias">precompute_relative_attention_bias() (tensorrt_llm.models.DecoderModel method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.EncoderModel.precompute_relative_attention_bias">(tensorrt_llm.models.EncoderModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.MLLaMAModel.precompute_relative_attention_bias">(tensorrt_llm.models.MLLaMAModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.WhisperEncoder.precompute_relative_attention_bias">(tensorrt_llm.models.WhisperEncoder method)</a>
</li>
      </ul></li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.ChatGLMForCausalLM.prepare_inputs">prepare_inputs() (tensorrt_llm.models.ChatGLMForCausalLM method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.DecoderModel.prepare_inputs">(tensorrt_llm.models.DecoderModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.DiT.prepare_inputs">(tensorrt_llm.models.DiT method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.EagleForCausalLM.prepare_inputs">(tensorrt_llm.models.EagleForCausalLM method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.EncoderModel.prepare_inputs">(tensorrt_llm.models.EncoderModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.MambaForCausalLM.prepare_inputs">(tensorrt_llm.models.MambaForCausalLM method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.MLLaMAModel.prepare_inputs">(tensorrt_llm.models.MLLaMAModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedModel.prepare_inputs">(tensorrt_llm.models.PretrainedModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.RecurrentGemmaForCausalLM.prepare_inputs">(tensorrt_llm.models.RecurrentGemmaForCausalLM method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.ReDrafterForCausalLM.prepare_inputs">(tensorrt_llm.models.ReDrafterForCausalLM method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.WhisperEncoder.prepare_inputs">(tensorrt_llm.models.WhisperEncoder method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.prepare_position_ids_for_cogvlm">prepare_position_ids_for_cogvlm() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.RecurrentGemmaForCausalLM.prepare_recurrent_inputs">prepare_recurrent_inputs() (tensorrt_llm.models.RecurrentGemmaForCausalLM method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.preprocess">preprocess() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.presence_penalty">presence_penalty (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.presence_penalty">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig">PretrainedConfig (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedModel">PretrainedModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.EncDecModelRunner.process_input">process_input() (tensorrt_llm.runtime.EncDecModelRunner method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.process_logits_including_draft">process_logits_including_draft() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.prod">prod() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.profiler">profiler (tensorrt_llm.runtime.GenerationSession property)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.profiling_verbosity">profiling_verbosity (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.PromptTuningEmbedding">PromptTuningEmbedding (class in tensorrt_llm.layers.embedding)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup">ptuning_setup() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup_fuyu">ptuning_setup_fuyu() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup_llava_next">ptuning_setup_llava_next() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup_phi3">ptuning_setup_phi3() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceConfig.PUSH_MODE">PUSH_MODE (tensorrt_llm.functional.AllReduceConfig attribute)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="Q">Q</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.quant_algo">quant_algo (tensorrt_llm.llmapi.QuantConfig attribute)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.quant_algo">(tensorrt_llm.models.PretrainedConfig property)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.quant_mode">quant_mode (tensorrt_llm.llmapi.QuantConfig property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.quant_mode">(tensorrt_llm.models.PretrainedConfig property)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.quant_mode">(tensorrt_llm.runtime.GenerationSession property)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.quant_mode">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo">QuantAlgo (class in tensorrt_llm.llmapi)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.quantization.html#tensorrt_llm.quantization.QuantAlgo">(class in tensorrt_llm.quantization)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig">QuantConfig (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.BaichuanForCausalLM.quantize">quantize() (tensorrt_llm.models.BaichuanForCausalLM class method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.ChatGLMForCausalLM.quantize">(tensorrt_llm.models.ChatGLMForCausalLM class method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.CogVLMForCausalLM.quantize">(tensorrt_llm.models.CogVLMForCausalLM class method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaForCausalLM.quantize">(tensorrt_llm.models.GemmaForCausalLM class method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTForCausalLM.quantize">(tensorrt_llm.models.GPTForCausalLM class method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.LLaMAForCausalLM.quantize">(tensorrt_llm.models.LLaMAForCausalLM class method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedModel.quantize">(tensorrt_llm.models.PretrainedModel class method)</a>
</li>
      </ul></li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.quantization.html#tensorrt_llm.quantization.quantize_and_export">quantize_and_export() (in module tensorrt_llm.quantization)</a>
</li>
      <li><a href="python-api/tensorrt_llm.quantization.html#tensorrt_llm.quantization.QuantMode">QuantMode (class in tensorrt_llm.quantization)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.quick_gelu">quick_gelu() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.QWenForCausalLMGenerationSession">QWenForCausalLMGenerationSession (class in tensorrt_llm.runtime)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="R">R</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.rand">rand() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.random_seed">random_seed (tensorrt_llm.llmapi.CalibConfig attribute)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.random_seed">(tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.random_seed">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.rank">rank() (tensorrt_llm.functional.Tensor method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.RecurrentGemmaForCausalLM">RecurrentGemmaForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.recv">recv() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.redrafter_draft_len_per_beam">redrafter_draft_len_per_beam (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.redrafter_num_beams">redrafter_num_beams (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.ReDrafterForCausalLM">ReDrafterForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.reduce">reduce() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.reduce_scatter">reduce_scatter() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.GuidedDecodingParams.regex">regex (tensorrt_llm.llmapi.GuidedDecodingParams attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.relative">relative (tensorrt_llm.functional.PositionEmbeddingType attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedModel.release">release() (tensorrt_llm.models.PretrainedModel method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.relu">relu() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.remove_input_padding">remove_input_padding (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.remove_input_padding">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.remove_input_padding">(tensorrt_llm.runtime.ModelRunner property)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.remove_input_padding">(tensorrt_llm.runtime.ModelRunnerCpp property)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.reorder_kv_cache_for_beam_search">reorder_kv_cache_for_beam_search() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.repeat_interleave">repeat_interleave() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.repetition_penalty">repetition_penalty (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.repetition_penalty">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.replace_all_uses_with">replace_all_uses_with() (tensorrt_llm.functional.Tensor method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestError">RequestError (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.RequestOutput">RequestOutput (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.requires_calibration">requires_calibration (tensorrt_llm.llmapi.QuantConfig property)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.requires_modelopt_quantization">requires_modelopt_quantization (tensorrt_llm.llmapi.QuantConfig property)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM">RESIDUAL_RMS_NORM (tensorrt_llm.functional.AllReduceFusionOp attribute)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.return_context_logits">return_context_logits (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.return_dict">return_dict (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.return_encoder_output">return_encoder_output (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.return_generation_logits">return_generation_logits (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.return_log_probs">return_log_probs (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.return_perf_metrics">return_perf_metrics (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.rg_lru">rg_lru() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.rms_norm">rms_norm() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.normalization.RmsNorm">RmsNorm (class in tensorrt_llm.layers.normalization)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.LayerNormType.RmsNorm">(tensorrt_llm.functional.LayerNormType attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.rnn_conv_dim_size">rnn_conv_dim_size (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.rnn_conv_dim_size">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.rnn_head_size">rnn_head_size (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.rnn_head_size">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.rnn_hidden_size">rnn_hidden_size (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.rnn_hidden_size">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.RobertaForQuestionAnswering">RobertaForQuestionAnswering (in module tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.RobertaForSequenceClassification">RobertaForSequenceClassification (in module tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.RobertaModel">RobertaModel (in module tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.rope_gpt_neox">rope_gpt_neox (tensorrt_llm.functional.PositionEmbeddingType attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.rope_gptj">rope_gptj (tensorrt_llm.functional.PositionEmbeddingType attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RopeEmbeddingUtils">RopeEmbeddingUtils (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RotaryScalingType">RotaryScalingType (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RopeEmbeddingUtils.rotate_every_two">rotate_every_two() (tensorrt_llm.functional.RopeEmbeddingUtils static method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RopeEmbeddingUtils.rotate_half">rotate_half() (tensorrt_llm.functional.RopeEmbeddingUtils static method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.round">round() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.RowLinear">RowLinear (class in tensorrt_llm.layers.linear)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.run">run() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.Session.run">(tensorrt_llm.runtime.Session method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.runtime">runtime (tensorrt_llm.runtime.GenerationSession attribute)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.Session.runtime">(tensorrt_llm.runtime.Session property)</a>
</li>
      </ul></li>
  </ul></td>
</tr></table>

<h2 id="S">S</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig">SamplingConfig (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams">SamplingParams (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.save">save() (tensorrt_llm.llmapi.LLM method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedModel.save_checkpoint">save_checkpoint() (tensorrt_llm.models.PretrainedModel method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.scatter">scatter() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.scatter_nd">scatter_nd() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SchedulerConfig">SchedulerConfig (class in tensorrt_llm.llmapi)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.secondary_offload_min_priority">secondary_offload_min_priority (tensorrt_llm.llmapi.KvCacheConfig property)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.seed">seed (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.select">select() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.select">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.selective_scan">selective_scan() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.send">send() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.serialize_engine">serialize_engine() (tensorrt_llm.runtime.ModelRunner method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.Session">Session (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#c.SET_FROM_OPTIONAL">SET_FROM_OPTIONAL (C macro)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.set_if_not_exist">set_if_not_exist() (tensorrt_llm.models.PretrainedConfig method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.set_rank">set_rank() (tensorrt_llm.models.PretrainedConfig method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.Attention.set_rel_attn_table">set_rel_attn_table() (tensorrt_llm.layers.attention.Attention method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.Session.set_shapes">set_shapes() (tensorrt_llm.runtime.Session method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.setup">setup() (tensorrt_llm.llmapi.SamplingParams method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.setup">(tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.setup_fake_prompts">setup_fake_prompts() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.setup_fake_prompts_qwen2vl">setup_fake_prompts_qwen2vl() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.setup_fake_prompts_vila">setup_fake_prompts_vila() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.setup_inputs">setup_inputs() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.shape">shape (tensorrt_llm.functional.Tensor property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.TensorInfo.shape">(tensorrt_llm.runtime.TensorInfo attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.shape">shape() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.SideStreamIDType">SideStreamIDType (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.sigmoid">sigmoid() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.silu">silu() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.sin">sin() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.KvCacheConfig.sink_token_length">sink_token_length (tensorrt_llm.llmapi.KvCacheConfig property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.sink_token_length">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.size">size() (tensorrt_llm.functional.Tensor method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.skip_cross_attn_blocks">skip_cross_attn_blocks (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.skip_cross_kv">skip_cross_kv (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.skip_special_tokens">skip_special_tokens (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.slice">slice() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AttentionMaskType.sliding_window_causal">sliding_window_causal (tensorrt_llm.functional.AttentionMaskType attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.smoothquant_val">smoothquant_val (tensorrt_llm.llmapi.QuantConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.softmax">softmax() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.softplus">softplus() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.spaces_between_special_tokens">spaces_between_special_tokens (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.SpecDecodingParams">SpecDecodingParams (class in tensorrt_llm.layers.attention)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.speculative_decoding_mode">speculative_decoding_mode (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.SpeculativeDecodingMode">SpeculativeDecodingMode (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.split">split() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.split">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.split_prompt_by_images">split_prompt_by_images() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.sqrt">sqrt() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.sqrt">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.squared_relu">squared_relu() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.squeeze">squeeze() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.stack">stack() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.state_dtype">state_dtype (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.state_dtype">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.state_size">state_size (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.state_size">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.STATIC_BATCH">STATIC_BATCH (tensorrt_llm.llmapi.CapacitySchedulerPolicy attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.KVCacheManager.step">step() (tensorrt_llm.runtime.KVCacheManager method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.stop">stop (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.stop_token_ids">stop_token_ids (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.stop_words_list">stop_words_list (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.StoppingCriteria">StoppingCriteria (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.StoppingCriteriaList">StoppingCriteriaList (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.strongly_typed">strongly_typed (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.sub">sub() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.sum">sum() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.swiglu">swiglu() (in module tensorrt_llm.functional)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="T">T</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.tanh">tanh() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.temperature">temperature (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.temperature">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor">Tensor (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.TensorInfo">TensorInfo (class in tensorrt_llm.runtime)</a>
</li>
      <li>
    tensorrt_llm

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#module-tensorrt_llm">module</a>, <a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm">[1]</a>, <a href="python-api/tensorrt_llm.models.html#module-tensorrt_llm">[2]</a>, <a href="python-api/tensorrt_llm.plugin.html#module-tensorrt_llm">[3]</a>, <a href="python-api/tensorrt_llm.quantization.html#module-tensorrt_llm">[4]</a>, <a href="python-api/tensorrt_llm.runtime.html#module-tensorrt_llm">[5]</a>
</li>
      </ul></li>
      <li><a href="_cpp_gen/executor.html#_CPPv412tensorrt_llm">tensorrt_llm (C++ type)</a>, <a href="_cpp_gen/executor.html#_CPPv412tensorrt_llm">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv412tensorrt_llm">[2]</a>, <a href="_cpp_gen/executor.html#_CPPv412tensorrt_llm">[3]</a>, <a href="_cpp_gen/executor.html#_CPPv412tensorrt_llm">[4]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[5]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[6]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[7]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[8]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[9]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[10]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[11]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[12]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[13]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[14]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[15]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[16]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[17]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[18]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[19]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[20]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[21]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[22]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[23]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[24]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[25]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[26]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[27]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[28]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[29]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[30]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[31]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[32]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[33]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[34]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[35]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[36]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[37]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[38]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[39]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[40]</a>
</li>
      <li>
    tensorrt_llm.functional

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#module-tensorrt_llm.functional">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.layers.activation

      <ul>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.activation">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.layers.attention

      <ul>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.attention">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.layers.cast

      <ul>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.cast">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.layers.conv

      <ul>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.conv">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.layers.embedding

      <ul>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.embedding">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.layers.linear

      <ul>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.linear">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.layers.mlp

      <ul>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.mlp">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.layers.normalization

      <ul>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.normalization">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.layers.pooling

      <ul>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.pooling">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.models

      <ul>
        <li><a href="python-api/tensorrt_llm.models.html#module-tensorrt_llm.models">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.plugin

      <ul>
        <li><a href="python-api/tensorrt_llm.plugin.html#module-tensorrt_llm.plugin">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.quantization

      <ul>
        <li><a href="python-api/tensorrt_llm.quantization.html#module-tensorrt_llm.quantization">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.runtime

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#module-tensorrt_llm.runtime">module</a>
</li>
      </ul></li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm13batch_managerE">tensorrt_llm::batch_manager (C++ type)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm13batch_managerE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm13batch_managerE">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm13batch_managerE">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm13batch_managerE">[4]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm13batch_manager16kv_cache_managerE">tensorrt_llm::batch_manager::kv_cache_manager (C++ type)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm13batch_manager16kv_cache_managerE">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executorE">tensorrt_llm::executor (C++ type)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executorE">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executorE">[2]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executorE">[3]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executorE">[4]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12BatchingTypeE">tensorrt_llm::executor::BatchingType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12BatchingType9kINFLIGHTE">tensorrt_llm::executor::BatchingType::kINFLIGHT (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12BatchingType7kSTATICE">tensorrt_llm::executor::BatchingType::kSTATIC (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10BeamTokensE">tensorrt_llm::executor::BeamTokens (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10BufferViewE">tensorrt_llm::executor::BufferView (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicyE">tensorrt_llm::executor::CapacitySchedulerPolicy (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy20kGUARANTEED_NO_EVICTE">tensorrt_llm::executor::CapacitySchedulerPolicy::kGUARANTEED_NO_EVICT (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy16kMAX_UTILIZATIONE">tensorrt_llm::executor::CapacitySchedulerPolicy::kMAX_UTILIZATION (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy13kSTATIC_BATCHE">tensorrt_llm::executor::CapacitySchedulerPolicy::kSTATIC_BATCH (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17CommunicationModeE">tensorrt_llm::executor::CommunicationMode (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17CommunicationMode7kLEADERE">tensorrt_llm::executor::CommunicationMode::kLEADER (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17CommunicationMode13kORCHESTRATORE">tensorrt_llm::executor::CommunicationMode::kORCHESTRATOR (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17CommunicationTypeE">tensorrt_llm::executor::CommunicationType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17CommunicationType4kMPIE">tensorrt_llm::executor::CommunicationType::kMPI (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicyE">tensorrt_llm::executor::ContextChunkingPolicy (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicy15kEQUAL_PROGRESSE">tensorrt_llm::executor::ContextChunkingPolicy::kEQUAL_PROGRESS (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicy24kFIRST_COME_FIRST_SERVEDE">tensorrt_llm::executor::ContextChunkingPolicy::kFIRST_COME_FIRST_SERVED (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsE">tensorrt_llm::executor::ContextPhaseParams (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdType">tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePv">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsERK18ContextPhaseParams">[2]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsERR18ContextPhaseParams">[3]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18ContextPhaseParams7deleterEPKv">tensorrt_llm::executor::ContextPhaseParams::deleter (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NKR12tensorrt_llm8executor18ContextPhaseParams17getFirstGenTokensEv">tensorrt_llm::executor::ContextPhaseParams::getFirstGenTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams8getReqIdEv">tensorrt_llm::executor::ContextPhaseParams::getReqId (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18ContextPhaseParams8getStateEv">tensorrt_llm::executor::ContextPhaseParams::getState (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams8getStateEv">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18ContextPhaseParams15mFirstGenTokensE">tensorrt_llm::executor::ContextPhaseParams::mFirstGenTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18ContextPhaseParams6mReqIdE">tensorrt_llm::executor::ContextPhaseParams::mReqId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18ContextPhaseParams6mStateE">tensorrt_llm::executor::ContextPhaseParams::mState (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsaSERK18ContextPhaseParams">tensorrt_llm::executor::ContextPhaseParams::operator= (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsaSERR18ContextPhaseParams">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor18ContextPhaseParamseqERK18ContextPhaseParams">tensorrt_llm::executor::ContextPhaseParams::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NO12tensorrt_llm8executor18ContextPhaseParams17popFirstGenTokensEv">tensorrt_llm::executor::ContextPhaseParams::popFirstGenTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18ContextPhaseParams12releaseStateEv">tensorrt_llm::executor::ContextPhaseParams::releaseState (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18ContextPhaseParams13RequestIdTypeE">tensorrt_llm::executor::ContextPhaseParams::RequestIdType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18ContextPhaseParams8StatePtrE">tensorrt_llm::executor::ContextPhaseParams::StatePtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsD0Ev">tensorrt_llm::executor::ContextPhaseParams::~ContextPhaseParams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8DataTypeE">tensorrt_llm::executor::DataType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8DataType5kBF16E">tensorrt_llm::executor::DataType::kBF16 (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8DataType5kBOOLE">tensorrt_llm::executor::DataType::kBOOL (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8DataType5kFP16E">tensorrt_llm::executor::DataType::kFP16 (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8DataType5kFP32E">tensorrt_llm::executor::DataType::kFP32 (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8DataType4kFP8E">tensorrt_llm::executor::DataType::kFP8 (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8DataType6kINT32E">tensorrt_llm::executor::DataType::kINT32 (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8DataType6kINT64E">tensorrt_llm::executor::DataType::kINT64 (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8DataType5kINT8E">tensorrt_llm::executor::DataType::kINT8 (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8DataType6kUINT8E">tensorrt_llm::executor::DataType::kUINT8 (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8DataType8kUNKNOWNE">tensorrt_llm::executor::DataType::kUNKNOWN (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11DebugConfigE">tensorrt_llm::executor::DebugConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32">tensorrt_llm::executor::DebugConfig::DebugConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor11DebugConfig20getDebugInputTensorsEv">tensorrt_llm::executor::DebugConfig::getDebugInputTensors (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor11DebugConfig21getDebugOutputTensorsEv">tensorrt_llm::executor::DebugConfig::getDebugOutputTensors (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor11DebugConfig19getDebugTensorNamesEv">tensorrt_llm::executor::DebugConfig::getDebugTensorNames (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor11DebugConfig28getDebugTensorsMaxIterationsEv">tensorrt_llm::executor::DebugConfig::getDebugTensorsMaxIterations (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11DebugConfig18mDebugInputTensorsE">tensorrt_llm::executor::DebugConfig::mDebugInputTensors (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11DebugConfig19mDebugOutputTensorsE">tensorrt_llm::executor::DebugConfig::mDebugOutputTensors (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11DebugConfig17mDebugTensorNamesE">tensorrt_llm::executor::DebugConfig::mDebugTensorNames (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11DebugConfig26mDebugTensorsMaxIterationsE">tensorrt_llm::executor::DebugConfig::mDebugTensorsMaxIterations (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor11DebugConfigeqERK11DebugConfig">tensorrt_llm::executor::DebugConfig::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11DebugConfig20setDebugInputTensorsEb">tensorrt_llm::executor::DebugConfig::setDebugInputTensors (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11DebugConfig21setDebugOutputTensorsEb">tensorrt_llm::executor::DebugConfig::setDebugOutputTensors (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11DebugConfig19setDebugTensorNamesERK9StringVec">tensorrt_llm::executor::DebugConfig::setDebugTensorNames (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11DebugConfig28setDebugTensorsMaxIterationsE10SizeType32">tensorrt_llm::executor::DebugConfig::setDebugTensorsMaxIterations (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11DebugConfig9StringVecE">tensorrt_llm::executor::DebugConfig::StringVec (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIterationE">tensorrt_llm::executor::DebugTensorsPerIteration (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIteration12debugTensorsE">tensorrt_llm::executor::DebugTensorsPerIteration::debugTensors (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIteration4iterE">tensorrt_llm::executor::DebugTensorsPerIteration::iter (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14DecodingConfigE">tensorrt_llm::executor::DecodingConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE">tensorrt_llm::executor::DecodingConfig::DecodingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14DecodingConfig15getDecodingModeEv">tensorrt_llm::executor::DecodingConfig::getDecodingMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14DecodingConfig14getEagleConfigEv">tensorrt_llm::executor::DecodingConfig::getEagleConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14DecodingConfig26getLookaheadDecodingConfigEv">tensorrt_llm::executor::DecodingConfig::getLookaheadDecodingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14DecodingConfig16getMedusaChoicesEv">tensorrt_llm::executor::DecodingConfig::getMedusaChoices (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14DecodingConfig13mDecodingModeE">tensorrt_llm::executor::DecodingConfig::mDecodingMode (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14DecodingConfig12mEagleConfigE">tensorrt_llm::executor::DecodingConfig::mEagleConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14DecodingConfig24mLookaheadDecodingConfigE">tensorrt_llm::executor::DecodingConfig::mLookaheadDecodingConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14DecodingConfig14mMedusaChoicesE">tensorrt_llm::executor::DecodingConfig::mMedusaChoices (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14DecodingConfigeqERK14DecodingConfig">tensorrt_llm::executor::DecodingConfig::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14DecodingConfig15setDecodingModeERK12DecodingMode">tensorrt_llm::executor::DecodingConfig::setDecodingMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14DecodingConfig14setEagleConfigERK11EagleConfig">tensorrt_llm::executor::DecodingConfig::setEagleConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14DecodingConfig20setLookaheadDecodingERK23LookaheadDecodingConfig">tensorrt_llm::executor::DecodingConfig::setLookaheadDecoding (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14DecodingConfig16setMedusaChoicesERK13MedusaChoices">tensorrt_llm::executor::DecodingConfig::setMedusaChoices (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingModeE">tensorrt_llm::executor::DecodingMode (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode9allBitSetE14UnderlyingType">tensorrt_llm::executor::DecodingMode::allBitSet (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode9anyBitSetE14UnderlyingType">tensorrt_llm::executor::DecodingMode::anyBitSet (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode4AutoEv">tensorrt_llm::executor::DecodingMode::Auto (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode10BeamSearchEv">tensorrt_llm::executor::DecodingMode::BeamSearch (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode12DecodingModeE14UnderlyingType">tensorrt_llm::executor::DecodingMode::DecodingMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode5EagleEv">tensorrt_llm::executor::DecodingMode::Eagle (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode19ExplicitDraftTokensEv">tensorrt_llm::executor::DecodingMode::ExplicitDraftTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode19ExternalDraftTokensEv">tensorrt_llm::executor::DecodingMode::ExternalDraftTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode8getStateEv">tensorrt_llm::executor::DecodingMode::getState (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode6isAutoEv">tensorrt_llm::executor::DecodingMode::isAuto (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode12isBeamSearchEv">tensorrt_llm::executor::DecodingMode::isBeamSearch (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode7isEagleEv">tensorrt_llm::executor::DecodingMode::isEagle (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode21isExplicitDraftTokensEv">tensorrt_llm::executor::DecodingMode::isExplicitDraftTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode21isExternalDraftTokensEv">tensorrt_llm::executor::DecodingMode::isExternalDraftTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode11isLookaheadEv">tensorrt_llm::executor::DecodingMode::isLookahead (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode8isMedusaEv">tensorrt_llm::executor::DecodingMode::isMedusa (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode6isTopKEv">tensorrt_llm::executor::DecodingMode::isTopK (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode13isTopKandTopPEv">tensorrt_llm::executor::DecodingMode::isTopKandTopP (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode12isTopKorTopPEv">tensorrt_llm::executor::DecodingMode::isTopKorTopP (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode6isTopPEv">tensorrt_llm::executor::DecodingMode::isTopP (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseBanTokensEv">tensorrt_llm::executor::DecodingMode::isUseBanTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode13isUseBanWordsEv">tensorrt_llm::executor::DecodingMode::isUseBanWords (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode20isUseExplicitEosStopEv">tensorrt_llm::executor::DecodingMode::isUseExplicitEosStop (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode21isUseFrequencyPenaltyEv">tensorrt_llm::executor::DecodingMode::isUseFrequencyPenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode18isUseMaxLengthStopEv">tensorrt_llm::executor::DecodingMode::isUseMaxLengthStop (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseMinLengthEv">tensorrt_llm::executor::DecodingMode::isUseMinLength (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseNoRepeatNgramSizeEv">tensorrt_llm::executor::DecodingMode::isUseNoRepeatNgramSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseOccurrencePenaltyEv">tensorrt_llm::executor::DecodingMode::isUseOccurrencePenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode12isUsePenaltyEv">tensorrt_llm::executor::DecodingMode::isUsePenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode20isUsePresencePenaltyEv">tensorrt_llm::executor::DecodingMode::isUsePresencePenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseRepetitionPenaltyEv">tensorrt_llm::executor::DecodingMode::isUseRepetitionPenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode17isUseStopCriteriaEv">tensorrt_llm::executor::DecodingMode::isUseStopCriteria (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseStopWordsEv">tensorrt_llm::executor::DecodingMode::isUseStopWords (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingMode16isUseTemperatureEv">tensorrt_llm::executor::DecodingMode::isUseTemperature (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode5kAutoE">tensorrt_llm::executor::DecodingMode::kAuto (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode11kBeamSearchE">tensorrt_llm::executor::DecodingMode::kBeamSearch (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode6kEagleE">tensorrt_llm::executor::DecodingMode::kEagle (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode20kExplicitDraftTokensE">tensorrt_llm::executor::DecodingMode::kExplicitDraftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode20kExternalDraftTokensE">tensorrt_llm::executor::DecodingMode::kExternalDraftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode10kLookaheadE">tensorrt_llm::executor::DecodingMode::kLookahead (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode7kMedusaE">tensorrt_llm::executor::DecodingMode::kMedusa (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode9kNumFlagsE">tensorrt_llm::executor::DecodingMode::kNumFlags (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode21kStandardStopCriteriaE">tensorrt_llm::executor::DecodingMode::kStandardStopCriteria (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode5kTopKE">tensorrt_llm::executor::DecodingMode::kTopK (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode9kTopKTopPE">tensorrt_llm::executor::DecodingMode::kTopKTopP (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode5kTopPE">tensorrt_llm::executor::DecodingMode::kTopP (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseBanTokensE">tensorrt_llm::executor::DecodingMode::kUseBanTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode12kUseBanWordsE">tensorrt_llm::executor::DecodingMode::kUseBanWords (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode19kUseExplicitEosStopE">tensorrt_llm::executor::DecodingMode::kUseExplicitEosStop (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode22kUseFrequencyPenaltiesE">tensorrt_llm::executor::DecodingMode::kUseFrequencyPenalties (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode17kUseMaxLengthStopE">tensorrt_llm::executor::DecodingMode::kUseMaxLengthStop (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseMinLengthE">tensorrt_llm::executor::DecodingMode::kUseMinLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode21kUseNoRepeatNgramSizeE">tensorrt_llm::executor::DecodingMode::kUseNoRepeatNgramSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode23kUseOccurrencePenaltiesE">tensorrt_llm::executor::DecodingMode::kUseOccurrencePenalties (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode13kUsePenaltiesE">tensorrt_llm::executor::DecodingMode::kUsePenalties (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode21kUsePresencePenaltiesE">tensorrt_llm::executor::DecodingMode::kUsePresencePenalties (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode23kUseRepetitionPenaltiesE">tensorrt_llm::executor::DecodingMode::kUseRepetitionPenalties (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseStopWordsE">tensorrt_llm::executor::DecodingMode::kUseStopWords (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode15kUseTemperatureE">tensorrt_llm::executor::DecodingMode::kUseTemperature (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode9LookaheadEv">tensorrt_llm::executor::DecodingMode::Lookahead (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode6MedusaEv">tensorrt_llm::executor::DecodingMode::Medusa (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode6mStateE">tensorrt_llm::executor::DecodingMode::mState (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor12DecodingModeeqERK12DecodingMode">tensorrt_llm::executor::DecodingMode::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode8setBitToE14UnderlyingTypeb">tensorrt_llm::executor::DecodingMode::setBitTo (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode4TopKEv">tensorrt_llm::executor::DecodingMode::TopK (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode8TopKTopPEv">tensorrt_llm::executor::DecodingMode::TopKTopP (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode4TopPEv">tensorrt_llm::executor::DecodingMode::TopP (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode14UnderlyingTypeE">tensorrt_llm::executor::DecodingMode::UnderlyingType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode12useBanTokensEb">tensorrt_llm::executor::DecodingMode::useBanTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode11useBanWordsEb">tensorrt_llm::executor::DecodingMode::useBanWords (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode18useExplicitEosStopEb">tensorrt_llm::executor::DecodingMode::useExplicitEosStop (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode19useFrequencyPenaltyEb">tensorrt_llm::executor::DecodingMode::useFrequencyPenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode16useMaxLengthStopEb">tensorrt_llm::executor::DecodingMode::useMaxLengthStop (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode12useMinLengthEb">tensorrt_llm::executor::DecodingMode::useMinLength (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode20useNoRepeatNgramSizeEb">tensorrt_llm::executor::DecodingMode::useNoRepeatNgramSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode22useOccurrencePenaltiesEb">tensorrt_llm::executor::DecodingMode::useOccurrencePenalties (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode18usePresencePenaltyEb">tensorrt_llm::executor::DecodingMode::usePresencePenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode20useRepetitionPenaltyEb">tensorrt_llm::executor::DecodingMode::useRepetitionPenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode12useStopWordsEb">tensorrt_llm::executor::DecodingMode::useStopWords (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12DecodingMode14useTemperatureEb">tensorrt_llm::executor::DecodingMode::useTemperature (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6detailE">tensorrt_llm::executor::detail (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6detail9DimType64E">tensorrt_llm::executor::detail::DimType64 (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE">tensorrt_llm::executor::detail::ofITensor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6detail9toITensorERK6Tensor">tensorrt_llm::executor::detail::toITensor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executorE">tensorrt_llm::executor::disagg_executor (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestratorE">tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator21awaitContextResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE">tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitContextResponses (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator24awaitGenerationResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE">tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitGenerationResponses (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator10canEnqueueEv">tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::canEnqueue (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb">tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator14enqueueContextERKNSt6vectorIN5texec7RequestEEENSt8optionalIiEEb">tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueContext (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb">tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueGeneration (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator19getContextExecutorsEv">tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::getContextExecutors (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator15getGenExecutorsEv">tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::getGenExecutors (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator5mImplE">tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::mImpl (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestratorD0Ev">tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::~DisaggExecutorOrchestrator (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdE">tensorrt_llm::executor::disagg_executor::ResponseWithId (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId3gidE">tensorrt_llm::executor::disagg_executor::ResponseWithId::gid (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERK14ResponseWithId">tensorrt_llm::executor::disagg_executor::ResponseWithId::operator= (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERR14ResponseWithId">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId8responseE">tensorrt_llm::executor::disagg_executor::ResponseWithId::response (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERK14ResponseWithId">tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERKN12tensorrt_llm8executor8ResponseE6IdType">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERR14ResponseWithId">[2]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERRN12tensorrt_llm8executor8ResponseE6IdType">[3]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdD0Ev">tensorrt_llm::executor::disagg_executor::ResponseWithId::~ResponseWithId (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22DisServingRequestStatsE">tensorrt_llm::executor::DisServingRequestStats (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22DisServingRequestStats17kvCacheTransferMSE">tensorrt_llm::executor::DisServingRequestStats::kvCacheTransferMS (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18DynamicBatchConfigE">tensorrt_llm::executor::DynamicBatchConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE">tensorrt_llm::executor::DynamicBatchConfig::DynamicBatchConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig17getBatchSizeTableEv">tensorrt_llm::executor::DynamicBatchConfig::getBatchSizeTable (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig34getDynamicBatchMovingAverageWindowEv">tensorrt_llm::executor::DynamicBatchConfig::getDynamicBatchMovingAverageWindow (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig24getEnableBatchSizeTuningEv">tensorrt_llm::executor::DynamicBatchConfig::getEnableBatchSizeTuning (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig27getEnableMaxNumTokensTuningEv">tensorrt_llm::executor::DynamicBatchConfig::getEnableMaxNumTokensTuning (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig22kDefaultBatchSizeTableE">tensorrt_llm::executor::DynamicBatchConfig::kDefaultBatchSizeTable (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig39kDefaultDynamicBatchMovingAverageWindowE">tensorrt_llm::executor::DynamicBatchConfig::kDefaultDynamicBatchMovingAverageWindow (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig15mBatchSizeTableE">tensorrt_llm::executor::DynamicBatchConfig::mBatchSizeTable (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig32mDynamicBatchMovingAverageWindowE">tensorrt_llm::executor::DynamicBatchConfig::mDynamicBatchMovingAverageWindow (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig22mEnableBatchSizeTuningE">tensorrt_llm::executor::DynamicBatchConfig::mEnableBatchSizeTuning (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig25mEnableMaxNumTokensTuningE">tensorrt_llm::executor::DynamicBatchConfig::mEnableMaxNumTokensTuning (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12EagleChoicesE">tensorrt_llm::executor::EagleChoices (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11EagleConfigE">tensorrt_llm::executor::EagleConfig (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11EagleConfig19checkPosteriorValueERKNSt8optionalIfEE">tensorrt_llm::executor::EagleConfig::checkPosteriorValue (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEE">tensorrt_llm::executor::EagleConfig::EagleConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor11EagleConfig15getEagleChoicesEv">tensorrt_llm::executor::EagleConfig::getEagleChoices (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor11EagleConfig21getPosteriorThresholdEv">tensorrt_llm::executor::EagleConfig::getPosteriorThreshold (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor11EagleConfig16isGreedySamplingEv">tensorrt_llm::executor::EagleConfig::isGreedySampling (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11EagleConfig13mEagleChoicesE">tensorrt_llm::executor::EagleConfig::mEagleChoices (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11EagleConfig15mGreedySamplingE">tensorrt_llm::executor::EagleConfig::mGreedySampling (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11EagleConfig19mPosteriorThresholdE">tensorrt_llm::executor::EagleConfig::mPosteriorThreshold (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor11EagleConfigeqERK11EagleConfig">tensorrt_llm::executor::EagleConfig::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8ExecutorE">tensorrt_llm::executor::Executor (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERK6IdTypeRKNSt8optionalINSt6chrono12millisecondsEEE">tensorrt_llm::executor::Executor::awaitResponses (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt6vectorI6IdTypeEERKNSt8optionalINSt6chrono12millisecondsEEE">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt8optionalINSt6chrono12millisecondsEEE">[2]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor13cancelRequestE6IdType">tensorrt_llm::executor::Executor::cancelRequest (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8Executor18canEnqueueRequestsEv">tensorrt_llm::executor::Executor::canEnqueueRequests (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor14enqueueRequestERK7Request">tensorrt_llm::executor::Executor::enqueueRequest (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor15enqueueRequestsERKNSt6vectorI7RequestEE">tensorrt_llm::executor::Executor::enqueueRequests (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEENSt10shared_ptrI5ModelEERK14ExecutorConfig">tensorrt_llm::executor::Executor::Executor (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEERK14ExecutorConfig">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE">[2]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig">[3]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK8Executor">[4]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig">[5]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig">[6]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERR8Executor">[7]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8Executor22getKVCacheEventManagerEv">tensorrt_llm::executor::Executor::getKVCacheEventManager (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor21getLatestDebugTensorsEv">tensorrt_llm::executor::Executor::getLatestDebugTensors (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor23getLatestIterationStatsEv">tensorrt_llm::executor::Executor::getLatestIterationStats (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor21getLatestRequestStatsEv">tensorrt_llm::executor::Executor::getLatestRequestStats (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8Executor20getNumResponsesReadyERKNSt8optionalI6IdTypeEE">tensorrt_llm::executor::Executor::getNumResponsesReady (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8Executor13isParticipantEv">tensorrt_llm::executor::Executor::isParticipant (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor5mImplE">tensorrt_llm::executor::Executor::mImpl (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8ExecutoraSERK8Executor">tensorrt_llm::executor::Executor::operator= (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8ExecutoraSERR8Executor">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor8shutdownEv">tensorrt_llm::executor::Executor::shutdown (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8ExecutorD0Ev">tensorrt_llm::executor::Executor::~Executor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfigE">tensorrt_llm::executor::ExecutorConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEE">tensorrt_llm::executor::ExecutorConfig::ExecutorConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getBatchingTypeEv">tensorrt_llm::executor::ExecutorConfig::getBatchingType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig14getDebugConfigEv">tensorrt_llm::executor::ExecutorConfig::getDebugConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig17getDecodingConfigEv">tensorrt_llm::executor::ExecutorConfig::getDecodingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig23getEnableChunkedContextEv">tensorrt_llm::executor::ExecutorConfig::getEnableChunkedContext (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig32getExtendedRuntimePerfKnobConfigEv">tensorrt_llm::executor::ExecutorConfig::getExtendedRuntimePerfKnobConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig20getGpuWeightsPercentEv">tensorrt_llm::executor::ExecutorConfig::getGpuWeightsPercent (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig23getGuidedDecodingConfigEv">tensorrt_llm::executor::ExecutorConfig::getGuidedDecodingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getIterStatsMaxIterationsEv">tensorrt_llm::executor::ExecutorConfig::getIterStatsMaxIterations (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig16getKvCacheConfigEv">tensorrt_llm::executor::ExecutorConfig::getKvCacheConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig19getKvCacheConfigRefEv">tensorrt_llm::executor::ExecutorConfig::getKvCacheConfigRef (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig28getLogitsPostProcessorConfigEv">tensorrt_llm::executor::ExecutorConfig::getLogitsPostProcessorConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxBatchSizeEv">tensorrt_llm::executor::ExecutorConfig::getMaxBatchSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxBeamWidthEv">tensorrt_llm::executor::ExecutorConfig::getMaxBeamWidth (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxNumTokensEv">tensorrt_llm::executor::ExecutorConfig::getMaxNumTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxQueueSizeEv">tensorrt_llm::executor::ExecutorConfig::getMaxQueueSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getMaxSeqIdleMicrosecondsEv">tensorrt_llm::executor::ExecutorConfig::getMaxSeqIdleMicroseconds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig20getNormalizeLogProbsEv">tensorrt_llm::executor::ExecutorConfig::getNormalizeLogProbs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig17getParallelConfigEv">tensorrt_llm::executor::ExecutorConfig::getParallelConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig18getPeftCacheConfigEv">tensorrt_llm::executor::ExecutorConfig::getPeftCacheConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig19getRecvPollPeriodMsEv">tensorrt_llm::executor::ExecutorConfig::getRecvPollPeriodMs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig28getRequestStatsMaxIterationsEv">tensorrt_llm::executor::ExecutorConfig::getRequestStatsMaxIterations (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig18getSchedulerConfigEv">tensorrt_llm::executor::ExecutorConfig::getSchedulerConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig21getSchedulerConfigRefEv">tensorrt_llm::executor::ExecutorConfig::getSchedulerConfigRef (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig16getSpecDecConfigEv">tensorrt_llm::executor::ExecutorConfig::getSpecDecConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig30kDefaultIterStatsMaxIterationsE">tensorrt_llm::executor::ExecutorConfig::kDefaultIterStatsMaxIterations (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig30kDefaultMaxSeqIdleMicrosecondsE">tensorrt_llm::executor::ExecutorConfig::kDefaultMaxSeqIdleMicroseconds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig33kDefaultRequestStatsMaxIterationsE">tensorrt_llm::executor::ExecutorConfig::kDefaultRequestStatsMaxIterations (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mBatchingTypeE">tensorrt_llm::executor::ExecutorConfig::mBatchingType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig12mDebugConfigE">tensorrt_llm::executor::ExecutorConfig::mDebugConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig15mDecodingConfigE">tensorrt_llm::executor::ExecutorConfig::mDecodingConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig21mEnableChunkedContextE">tensorrt_llm::executor::ExecutorConfig::mEnableChunkedContext (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig30mExtendedRuntimePerfKnobConfigE">tensorrt_llm::executor::ExecutorConfig::mExtendedRuntimePerfKnobConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig18mGpuWeightsPercentE">tensorrt_llm::executor::ExecutorConfig::mGpuWeightsPercent (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig21mGuidedDecodingConfigE">tensorrt_llm::executor::ExecutorConfig::mGuidedDecodingConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mIterStatsMaxIterationsE">tensorrt_llm::executor::ExecutorConfig::mIterStatsMaxIterations (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig14mKvCacheConfigE">tensorrt_llm::executor::ExecutorConfig::mKvCacheConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mLogitsPostProcessorConfigE">tensorrt_llm::executor::ExecutorConfig::mLogitsPostProcessorConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxBatchSizeE">tensorrt_llm::executor::ExecutorConfig::mMaxBatchSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxBeamWidthE">tensorrt_llm::executor::ExecutorConfig::mMaxBeamWidth (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxNumTokensE">tensorrt_llm::executor::ExecutorConfig::mMaxNumTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxQueueSizeE">tensorrt_llm::executor::ExecutorConfig::mMaxQueueSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mMaxSeqIdleMicrosecondsE">tensorrt_llm::executor::ExecutorConfig::mMaxSeqIdleMicroseconds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig18mNormalizeLogProbsE">tensorrt_llm::executor::ExecutorConfig::mNormalizeLogProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig15mParallelConfigE">tensorrt_llm::executor::ExecutorConfig::mParallelConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig16mPeftCacheConfigE">tensorrt_llm::executor::ExecutorConfig::mPeftCacheConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig17mRecvPollPeriodMsE">tensorrt_llm::executor::ExecutorConfig::mRecvPollPeriodMs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mRequestStatsMaxIterationsE">tensorrt_llm::executor::ExecutorConfig::mRequestStatsMaxIterations (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig16mSchedulerConfigE">tensorrt_llm::executor::ExecutorConfig::mSchedulerConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mSpeculativeDecodingConfigE">tensorrt_llm::executor::ExecutorConfig::mSpeculativeDecodingConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setBatchingTypeE12BatchingType">tensorrt_llm::executor::ExecutorConfig::setBatchingType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig14setDebugConfigERK11DebugConfig">tensorrt_llm::executor::ExecutorConfig::setDebugConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setDecodingConfigERK14DecodingConfig">tensorrt_llm::executor::ExecutorConfig::setDecodingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setEnableChunkedContextEb">tensorrt_llm::executor::ExecutorConfig::setEnableChunkedContext (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig32setExtendedRuntimePerfKnobConfigERK29ExtendedRuntimePerfKnobConfig">tensorrt_llm::executor::ExecutorConfig::setExtendedRuntimePerfKnobConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setGpuWeightsPercentERKf">tensorrt_llm::executor::ExecutorConfig::setGpuWeightsPercent (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setGuidedDecodingConfigERK20GuidedDecodingConfig">tensorrt_llm::executor::ExecutorConfig::setGuidedDecodingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setIterStatsMaxIterationsE10SizeType32">tensorrt_llm::executor::ExecutorConfig::setIterStatsMaxIterations (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setKvCacheConfigERK13KvCacheConfig">tensorrt_llm::executor::ExecutorConfig::setKvCacheConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setLogitsPostProcessorConfigERK25LogitsPostProcessorConfig">tensorrt_llm::executor::ExecutorConfig::setLogitsPostProcessorConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBatchSizeE10SizeType32">tensorrt_llm::executor::ExecutorConfig::setMaxBatchSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBeamWidthE10SizeType32">tensorrt_llm::executor::ExecutorConfig::setMaxBeamWidth (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxNumTokensE10SizeType32">tensorrt_llm::executor::ExecutorConfig::setMaxNumTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxQueueSizeERKNSt8optionalI10SizeType32EE">tensorrt_llm::executor::ExecutorConfig::setMaxQueueSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setMaxSeqIdleMicrosecondsE8uint64_t">tensorrt_llm::executor::ExecutorConfig::setMaxSeqIdleMicroseconds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setNormalizeLogProbsEb">tensorrt_llm::executor::ExecutorConfig::setNormalizeLogProbs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setParallelConfigERK14ParallelConfig">tensorrt_llm::executor::ExecutorConfig::setParallelConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setPeftCacheConfigERK15PeftCacheConfig">tensorrt_llm::executor::ExecutorConfig::setPeftCacheConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setRecvPollPeriodMsERK10SizeType32">tensorrt_llm::executor::ExecutorConfig::setRecvPollPeriodMs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setRequestStatsMaxIterationsE10SizeType32">tensorrt_llm::executor::ExecutorConfig::setRequestStatsMaxIterations (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setSchedulerConfigERK15SchedulerConfig">tensorrt_llm::executor::ExecutorConfig::setSchedulerConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setSpecDecConfigERK25SpeculativeDecodingConfig">tensorrt_llm::executor::ExecutorConfig::setSpecDecConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfigE">tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32">tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::ExtendedRuntimePerfKnobConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig21getCudaGraphCacheSizeEv">tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::getCudaGraphCacheSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig16getCudaGraphModeEv">tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::getCudaGraphMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig27getEnableContextFMHAFP32AccEv">tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::getEnableContextFMHAFP32Acc (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig17getMultiBlockModeEv">tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::getMultiBlockMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig19mCudaGraphCacheSizeE">tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::mCudaGraphCacheSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig14mCudaGraphModeE">tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::mCudaGraphMode (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig25mEnableContextFMHAFP32AccE">tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::mEnableContextFMHAFP32Acc (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig15mMultiBlockModeE">tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::mMultiBlockMode (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfigeqERK29ExtendedRuntimePerfKnobConfig">tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig21setCudaGraphCacheSizeE10SizeType32">tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setCudaGraphCacheSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig16setCudaGraphModeEb">tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setCudaGraphMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig27setEnableContextFMHAFP32AccEb">tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setEnableContextFMHAFP32Acc (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig17setMultiBlockModeEb">tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setMultiBlockMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfigE">tensorrt_llm::executor::ExternalDraftTokensConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE">tensorrt_llm::executor::ExternalDraftTokensConfig::ExternalDraftTokensConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig22getAcceptanceThresholdEv">tensorrt_llm::executor::ExternalDraftTokensConfig::getAcceptanceThreshold (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig13getFastLogitsEv">tensorrt_llm::executor::ExternalDraftTokensConfig::getFastLogits (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig9getLogitsEv">tensorrt_llm::executor::ExternalDraftTokensConfig::getLogits (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig9getTokensEv">tensorrt_llm::executor::ExternalDraftTokensConfig::getTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig20mAcceptanceThresholdE">tensorrt_llm::executor::ExternalDraftTokensConfig::mAcceptanceThreshold (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig11mFastLogitsE">tensorrt_llm::executor::ExternalDraftTokensConfig::mFastLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig7mLogitsE">tensorrt_llm::executor::ExternalDraftTokensConfig::mLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig7mTokensE">tensorrt_llm::executor::ExternalDraftTokensConfig::mTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12FinishReasonE">tensorrt_llm::executor::FinishReason (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12FinishReason10kCANCELLEDE">tensorrt_llm::executor::FinishReason::kCANCELLED (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12FinishReason7kEND_IDE">tensorrt_llm::executor::FinishReason::kEND_ID (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12FinishReason7kLENGTHE">tensorrt_llm::executor::FinishReason::kLENGTH (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12FinishReason13kNOT_FINISHEDE">tensorrt_llm::executor::FinishReason::kNOT_FINISHED (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12FinishReason11kSTOP_WORDSE">tensorrt_llm::executor::FinishReason::kSTOP_WORDS (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12FinishReason10kTIMED_OUTE">tensorrt_llm::executor::FinishReason::kTIMED_OUT (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor9FloatTypeE">tensorrt_llm::executor::FloatType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfigE">tensorrt_llm::executor::GuidedDecodingConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig10getBackendEv">tensorrt_llm::executor::GuidedDecodingConfig::getBackend (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getEncodedVocabEv">tensorrt_llm::executor::GuidedDecodingConfig::getEncodedVocab (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getStopTokenIdsEv">tensorrt_llm::executor::GuidedDecodingConfig::getStopTokenIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getTokenizerStrEv">tensorrt_llm::executor::GuidedDecodingConfig::getTokenizerStr (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackendE">tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingBackend (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackend9kXGRAMMARE">tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingBackend::kXGRAMMAR (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE">tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig8mBackendE">tensorrt_llm::executor::GuidedDecodingConfig::mBackend (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mEncodedVocabE">tensorrt_llm::executor::GuidedDecodingConfig::mEncodedVocab (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mStopTokenIdsE">tensorrt_llm::executor::GuidedDecodingConfig::mStopTokenIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mTokenizerStrE">tensorrt_llm::executor::GuidedDecodingConfig::mTokenizerStr (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfigeqERK20GuidedDecodingConfig">tensorrt_llm::executor::GuidedDecodingConfig::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig10setBackendERK21GuidedDecodingBackend">tensorrt_llm::executor::GuidedDecodingConfig::setBackend (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setEncodedVocabERKNSt6vectorINSt6stringEEE">tensorrt_llm::executor::GuidedDecodingConfig::setEncodedVocab (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setStopTokenIdsERKNSt6vectorI11TokenIdTypeEE">tensorrt_llm::executor::GuidedDecodingConfig::setStopTokenIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setTokenizerStrERKNSt6stringE">tensorrt_llm::executor::GuidedDecodingConfig::setTokenizerStr (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig8validateEv">tensorrt_llm::executor::GuidedDecodingConfig::validate (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingParamsE">tensorrt_llm::executor::GuidedDecodingParams (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParams8getGuideEv">tensorrt_llm::executor::GuidedDecodingParams::getGuide (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParams12getGuideTypeEv">tensorrt_llm::executor::GuidedDecodingParams::getGuideType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams20GuidedDecodingParamsE9GuideTypeNSt8optionalINSt6stringEEE">tensorrt_llm::executor::GuidedDecodingParams::GuidedDecodingParams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideTypeE">tensorrt_llm::executor::GuidedDecodingParams::GuideType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType13kEBNF_GRAMMARE">tensorrt_llm::executor::GuidedDecodingParams::GuideType::kEBNF_GRAMMAR (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType5kJSONE">tensorrt_llm::executor::GuidedDecodingParams::GuideType::kJSON (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType12kJSON_SCHEMAE">tensorrt_llm::executor::GuidedDecodingParams::GuideType::kJSON_SCHEMA (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType6kREGEXE">tensorrt_llm::executor::GuidedDecodingParams::GuideType::kREGEX (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams6mGuideE">tensorrt_llm::executor::GuidedDecodingParams::mGuide (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams10mGuideTypeE">tensorrt_llm::executor::GuidedDecodingParams::mGuideType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParamseqERK20GuidedDecodingParams">tensorrt_llm::executor::GuidedDecodingParams::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6IdTypeE">tensorrt_llm::executor::IdType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor21InflightBatchingStatsE">tensorrt_llm::executor::InflightBatchingStats (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor21InflightBatchingStats26avgNumDecodedTokensPerIterE">tensorrt_llm::executor::InflightBatchingStats::avgNumDecodedTokensPerIter (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor21InflightBatchingStats12microBatchIdE">tensorrt_llm::executor::InflightBatchingStats::microBatchId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor21InflightBatchingStats18numContextRequestsE">tensorrt_llm::executor::InflightBatchingStats::numContextRequests (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor21InflightBatchingStats12numCtxTokensE">tensorrt_llm::executor::InflightBatchingStats::numCtxTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor21InflightBatchingStats14numGenRequestsE">tensorrt_llm::executor::InflightBatchingStats::numGenRequests (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor21InflightBatchingStats17numPausedRequestsE">tensorrt_llm::executor::InflightBatchingStats::numPausedRequests (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor21InflightBatchingStats20numScheduledRequestsE">tensorrt_llm::executor::InflightBatchingStats::numScheduledRequests (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStatsE">tensorrt_llm::executor::IterationStats (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats11cpuMemUsageE">tensorrt_llm::executor::IterationStats::cpuMemUsage (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats17crossKvCacheStatsE">tensorrt_llm::executor::IterationStats::crossKvCacheStats (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats11gpuMemUsageE">tensorrt_llm::executor::IterationStats::gpuMemUsage (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats21inflightBatchingStatsE">tensorrt_llm::executor::IterationStats::inflightBatchingStats (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats4iterE">tensorrt_llm::executor::IterationStats::iter (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats13iterLatencyMSE">tensorrt_llm::executor::IterationStats::iterLatencyMS (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats12kvCacheStatsE">tensorrt_llm::executor::IterationStats::kvCacheStats (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats19maxBatchSizeRuntimeE">tensorrt_llm::executor::IterationStats::maxBatchSizeRuntime (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats18maxBatchSizeStaticE">tensorrt_llm::executor::IterationStats::maxBatchSizeStatic (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats28maxBatchSizeTunerRecommendedE">tensorrt_llm::executor::IterationStats::maxBatchSizeTunerRecommended (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats20maxNumActiveRequestsE">tensorrt_llm::executor::IterationStats::maxNumActiveRequests (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats19maxNumTokensRuntimeE">tensorrt_llm::executor::IterationStats::maxNumTokensRuntime (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats18maxNumTokensStaticE">tensorrt_llm::executor::IterationStats::maxNumTokensStatic (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats28maxNumTokensTunerRecommendedE">tensorrt_llm::executor::IterationStats::maxNumTokensTunerRecommended (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats31newActiveRequestsQueueLatencyMSE">tensorrt_llm::executor::IterationStats::newActiveRequestsQueueLatencyMS (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats17numActiveRequestsE">tensorrt_llm::executor::IterationStats::numActiveRequests (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats20numCompletedRequestsE">tensorrt_llm::executor::IterationStats::numCompletedRequests (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats20numNewActiveRequestsE">tensorrt_llm::executor::IterationStats::numNewActiveRequests (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats17numQueuedRequestsE">tensorrt_llm::executor::IterationStats::numQueuedRequests (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats14pinnedMemUsageE">tensorrt_llm::executor::IterationStats::pinnedMemUsage (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats19staticBatchingStatsE">tensorrt_llm::executor::IterationStats::staticBatchingStats (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats9timestampE">tensorrt_llm::executor::IterationStats::timestamp (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13IterationTypeE">tensorrt_llm::executor::IterationType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17JsonSerializationE">tensorrt_llm::executor::JsonSerialization (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK12RequestStats">tensorrt_llm::executor::JsonSerialization::toJsonStr (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK14IterationStats">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK24RequestStatsPerIteration">[2]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8kv_cacheE">tensorrt_llm::executor::kv_cache (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfigE">tensorrt_llm::executor::KvCacheConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig34fillEmptyFieldsFromRuntimeDefaultsEN12tensorrt_llm7runtime15RuntimeDefaultsE">tensorrt_llm::executor::KvCacheConfig::fillEmptyFieldsFromRuntimeDefaults (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor13KvCacheConfig23getCrossKvCacheFractionEv">tensorrt_llm::executor::KvCacheConfig::getCrossKvCacheFraction (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor13KvCacheConfig19getEnableBlockReuseEv">tensorrt_llm::executor::KvCacheConfig::getEnableBlockReuse (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getEventBufferMaxSizeEv">tensorrt_llm::executor::KvCacheConfig::getEventBufferMaxSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor13KvCacheConfig24getFreeGpuMemoryFractionEv">tensorrt_llm::executor::KvCacheConfig::getFreeGpuMemoryFraction (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor13KvCacheConfig16getHostCacheSizeEv">tensorrt_llm::executor::KvCacheConfig::getHostCacheSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor13KvCacheConfig24getMaxAttentionWindowVecEv">tensorrt_llm::executor::KvCacheConfig::getMaxAttentionWindowVec (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor13KvCacheConfig12getMaxTokensEv">tensorrt_llm::executor::KvCacheConfig::getMaxTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor13KvCacheConfig16getOnboardBlocksEv">tensorrt_llm::executor::KvCacheConfig::getOnboardBlocks (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor13KvCacheConfig30getSecondaryOffloadMinPriorityEv">tensorrt_llm::executor::KvCacheConfig::getSecondaryOffloadMinPriority (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor13KvCacheConfig18getSinkTokenLengthEv">tensorrt_llm::executor::KvCacheConfig::getSinkTokenLength (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tRKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEE">tensorrt_llm::executor::KvCacheConfig::KvCacheConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig21mCrossKvCacheFractionE">tensorrt_llm::executor::KvCacheConfig::mCrossKvCacheFraction (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig17mEnableBlockReuseE">tensorrt_llm::executor::KvCacheConfig::mEnableBlockReuse (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mEventBufferMaxSizeE">tensorrt_llm::executor::KvCacheConfig::mEventBufferMaxSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig22mFreeGpuMemoryFractionE">tensorrt_llm::executor::KvCacheConfig::mFreeGpuMemoryFraction (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig14mHostCacheSizeE">tensorrt_llm::executor::KvCacheConfig::mHostCacheSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig22mMaxAttentionWindowVecE">tensorrt_llm::executor::KvCacheConfig::mMaxAttentionWindowVec (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig10mMaxTokensE">tensorrt_llm::executor::KvCacheConfig::mMaxTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig14mOnboardBlocksE">tensorrt_llm::executor::KvCacheConfig::mOnboardBlocks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig28mSecondaryOffloadMinPriorityE">tensorrt_llm::executor::KvCacheConfig::mSecondaryOffloadMinPriority (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig16mSinkTokenLengthE">tensorrt_llm::executor::KvCacheConfig::mSinkTokenLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig23setCrossKvCacheFractionE9FloatType">tensorrt_llm::executor::KvCacheConfig::setCrossKvCacheFraction (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig19setEnableBlockReuseEb">tensorrt_llm::executor::KvCacheConfig::setEnableBlockReuse (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEventBufferMaxSizeE6size_t">tensorrt_llm::executor::KvCacheConfig::setEventBufferMaxSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setFreeGpuMemoryFractionE9FloatType">tensorrt_llm::executor::KvCacheConfig::setFreeGpuMemoryFraction (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setHostCacheSizeE6size_t">tensorrt_llm::executor::KvCacheConfig::setHostCacheSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setMaxAttentionWindowVecENSt6vectorI10SizeType32EE">tensorrt_llm::executor::KvCacheConfig::setMaxAttentionWindowVec (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig12setMaxTokensE10SizeType32">tensorrt_llm::executor::KvCacheConfig::setMaxTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setOnboardBlocksEb">tensorrt_llm::executor::KvCacheConfig::setOnboardBlocks (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig30setSecondaryOffloadMinPriorityENSt8optionalI17RetentionPriorityEE">tensorrt_llm::executor::KvCacheConfig::setSecondaryOffloadMinPriority (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig18setSinkTokenLengthE10SizeType32">tensorrt_llm::executor::KvCacheConfig::setSinkTokenLength (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18KVCacheCreatedDataE">tensorrt_llm::executor::KVCacheCreatedData (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18KVCacheCreatedData22numBlocksPerCacheLevelE">tensorrt_llm::executor::KVCacheCreatedData::numBlocksPerCacheLevel (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12KVCacheEventE">tensorrt_llm::executor::KVCacheEvent (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12KVCacheEvent4dataE">tensorrt_llm::executor::KVCacheEvent::data (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12KVCacheEvent7eventIdE">tensorrt_llm::executor::KVCacheEvent::eventId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12KVCacheEvent12KVCacheEventE6IdType16KVCacheEventData">tensorrt_llm::executor::KVCacheEvent::KVCacheEvent (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor16KVCacheEventDataE">tensorrt_llm::executor::KVCacheEventData (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4I0EN12tensorrt_llm8executor16KVCacheEventDiffE">tensorrt_llm::executor::KVCacheEventDiff (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor16KVCacheEventDiff8newValueE">tensorrt_llm::executor::KVCacheEventDiff::newValue (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor16KVCacheEventDiff8oldValueE">tensorrt_llm::executor::KVCacheEventDiff::oldValue (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19KVCacheEventManagerE">tensorrt_llm::executor::KVCacheEventManager (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19KVCacheEventManager15getLatestEventsENSt8optionalINSt6chrono12millisecondsEEE">tensorrt_llm::executor::KVCacheEventManager::getLatestEvents (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19KVCacheEventManager19KVCacheEventManagerENSt10shared_ptrIN12tensorrt_llm13batch_manager16kv_cache_manager18BaseKVCacheManagerEEE">tensorrt_llm::executor::KVCacheEventManager::KVCacheEventManager (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19KVCacheEventManager14kvCacheManagerE">tensorrt_llm::executor::KVCacheEventManager::kvCacheManager (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18KVCacheRemovedDataE">tensorrt_llm::executor::KVCacheRemovedData (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18KVCacheRemovedData11blockHashesE">tensorrt_llm::executor::KVCacheRemovedData::blockHashes (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfigE">tensorrt_llm::executor::KvCacheRetentionConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig19getDecodeDurationMsEv">tensorrt_llm::executor::KvCacheRetentionConfig::getDecodeDurationMs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig26getDecodeRetentionPriorityEv">tensorrt_llm::executor::KvCacheRetentionConfig::getDecodeRetentionPriority (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig36getPerBlockRetentionPriorityDurationE10SizeType3210SizeType32">tensorrt_llm::executor::KvCacheRetentionConfig::getPerBlockRetentionPriorityDuration (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig29getTokenRangeRetentionConfigsEv">tensorrt_llm::executor::KvCacheRetentionConfig::getTokenRangeRetentionConfigs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25kDefaultRetentionPriorityE">tensorrt_llm::executor::KvCacheRetentionConfig::kDefaultRetentionPriority (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig21kMaxRetentionPriorityE">tensorrt_llm::executor::KvCacheRetentionConfig::kMaxRetentionPriority (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig21kMinRetentionPriorityE">tensorrt_llm::executor::KvCacheRetentionConfig::kMinRetentionPriority (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE">tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigEv">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig17mDecodeDurationMsE">tensorrt_llm::executor::KvCacheRetentionConfig::mDecodeDurationMs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig24mDecodeRetentionPriorityE">tensorrt_llm::executor::KvCacheRetentionConfig::mDecodeRetentionPriority (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig27mTokenRangeRetentionConfigsE">tensorrt_llm::executor::KvCacheRetentionConfig::mTokenRangeRetentionConfigs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfigE">tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig10durationMsE">tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::durationMs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfigeqERK25TokenRangeRetentionConfig">tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig8priorityE">tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::priority (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig8tokenEndE">tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::tokenEnd (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE">tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::TokenRangeRetentionConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig10tokenStartE">tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::tokenStart (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12KvCacheStatsE">tensorrt_llm::executor::KvCacheStats (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12KvCacheStats14allocNewBlocksE">tensorrt_llm::executor::KvCacheStats::allocNewBlocks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12KvCacheStats16allocTotalBlocksE">tensorrt_llm::executor::KvCacheStats::allocTotalBlocks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12KvCacheStats12cacheHitRateE">tensorrt_llm::executor::KvCacheStats::cacheHitRate (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12KvCacheStats13freeNumBlocksE">tensorrt_llm::executor::KvCacheStats::freeNumBlocks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12KvCacheStats12maxNumBlocksE">tensorrt_llm::executor::KvCacheStats::maxNumBlocks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12KvCacheStats12missedBlocksE">tensorrt_llm::executor::KvCacheStats::missedBlocks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12KvCacheStats12reusedBlocksE">tensorrt_llm::executor::KvCacheStats::reusedBlocks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12KvCacheStats14tokensPerBlockE">tensorrt_llm::executor::KvCacheStats::tokensPerBlock (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12KvCacheStats13usedNumBlocksE">tensorrt_llm::executor::KvCacheStats::usedNumBlocks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockDataE">tensorrt_llm::executor::KVCacheStoredBlockData (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData9blockHashE">tensorrt_llm::executor::KVCacheStoredBlockData::blockHash (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData10cacheLevelE">tensorrt_llm::executor::KVCacheStoredBlockData::cacheLevel (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensEN12tensorrt_llm7runtime14LoraTaskIdTypeE10SizeType3210SizeType32">tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6loraIdE">tensorrt_llm::executor::KVCacheStoredBlockData::loraId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData8priorityE">tensorrt_llm::executor::KVCacheStoredBlockData::priority (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6tokensE">tensorrt_llm::executor::KVCacheStoredBlockData::tokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17KVCacheStoredDataE">tensorrt_llm::executor::KVCacheStoredData (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17KVCacheStoredData6blocksE">tensorrt_llm::executor::KVCacheStoredData::blocks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17KVCacheStoredData10parentHashE">tensorrt_llm::executor::KVCacheStoredData::parentHash (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedDataE">tensorrt_llm::executor::KVCacheUpdatedData (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData9blockHashE">tensorrt_llm::executor::KVCacheUpdatedData::blockHash (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData10cacheLevelE">tensorrt_llm::executor::KVCacheUpdatedData::cacheLevel (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData17cacheLevelUpdatedE10SizeType3210SizeType32">tensorrt_llm::executor::KVCacheUpdatedData::cacheLevelUpdated (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdType">tensorrt_llm::executor::KVCacheUpdatedData::KVCacheUpdatedData (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData8priorityE">tensorrt_llm::executor::KVCacheUpdatedData::priority (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData15priorityUpdatedE10SizeType3210SizeType32">tensorrt_llm::executor::KVCacheUpdatedData::priorityUpdated (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19LogitsPostProcessorE">tensorrt_llm::executor::LogitsPostProcessor (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor26LogitsPostProcessorBatchedE">tensorrt_llm::executor::LogitsPostProcessorBatched (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfigE">tensorrt_llm::executor::LogitsPostProcessorConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig19getProcessorBatchedEv">tensorrt_llm::executor::LogitsPostProcessorConfig::getProcessorBatched (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig15getProcessorMapEv">tensorrt_llm::executor::LogitsPostProcessorConfig::getProcessorMap (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig12getReplicateEv">tensorrt_llm::executor::LogitsPostProcessorConfig::getReplicate (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig25LogitsPostProcessorConfigENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI26LogitsPostProcessorBatchedEEb">tensorrt_llm::executor::LogitsPostProcessorConfig::LogitsPostProcessorConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig17mProcessorBatchedE">tensorrt_llm::executor::LogitsPostProcessorConfig::mProcessorBatched (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig13mProcessorMapE">tensorrt_llm::executor::LogitsPostProcessorConfig::mProcessorMap (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig10mReplicateE">tensorrt_llm::executor::LogitsPostProcessorConfig::mReplicate (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig19setProcessorBatchedERK26LogitsPostProcessorBatched">tensorrt_llm::executor::LogitsPostProcessorConfig::setProcessorBatched (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig15setProcessorMapERK22LogitsPostProcessorMap">tensorrt_llm::executor::LogitsPostProcessorConfig::setProcessorMap (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig12setReplicateEb">tensorrt_llm::executor::LogitsPostProcessorConfig::setReplicate (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22LogitsPostProcessorMapE">tensorrt_llm::executor::LogitsPostProcessorMap (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfigE">tensorrt_llm::executor::LookaheadDecodingConfig (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig28calculateSpeculativeResourceEv">tensorrt_llm::executor::LookaheadDecodingConfig::calculateSpeculativeResource (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig3getEv">tensorrt_llm::executor::LookaheadDecodingConfig::get (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig12getNgramSizeEv">tensorrt_llm::executor::LookaheadDecodingConfig::getNgramSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig22getVerificationSetSizeEv">tensorrt_llm::executor::LookaheadDecodingConfig::getVerificationSetSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig13getWindowSizeEv">tensorrt_llm::executor::LookaheadDecodingConfig::getWindowSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig4isLEERK23LookaheadDecodingConfig">tensorrt_llm::executor::LookaheadDecodingConfig::isLE (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig7isLegalE10SizeType3210SizeType3210SizeType32">tensorrt_llm::executor::LookaheadDecodingConfig::isLegal (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigE10SizeType3210SizeType3210SizeType32">tensorrt_llm::executor::LookaheadDecodingConfig::LookaheadDecodingConfig (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigEv">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig10mNgramSizeE">tensorrt_llm::executor::LookaheadDecodingConfig::mNgramSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig20mVerificationSetSizeE">tensorrt_llm::executor::LookaheadDecodingConfig::mVerificationSetSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig11mWindowSizeE">tensorrt_llm::executor::LookaheadDecodingConfig::mWindowSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfigeqERK23LookaheadDecodingConfig">tensorrt_llm::executor::LookaheadDecodingConfig::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10LoraConfigE">tensorrt_llm::executor::LoraConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor10LoraConfig9getConfigEv">tensorrt_llm::executor::LoraConfig::getConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor10LoraConfig9getTaskIdEv">tensorrt_llm::executor::LoraConfig::getTaskId (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor10LoraConfig10getWeightsEv">tensorrt_llm::executor::LoraConfig::getWeights (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE">tensorrt_llm::executor::LoraConfig::LoraConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10LoraConfig7mConfigE">tensorrt_llm::executor::LoraConfig::mConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10LoraConfig7mTaskIdE">tensorrt_llm::executor::LoraConfig::mTaskId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10LoraConfig8mWeightsE">tensorrt_llm::executor::LoraConfig::mWeights (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13MedusaChoicesE">tensorrt_llm::executor::MedusaChoices (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10MemoryTypeE">tensorrt_llm::executor::MemoryType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10MemoryType4kCPUE">tensorrt_llm::executor::MemoryType::kCPU (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10MemoryType11kCPU_PINNEDE">tensorrt_llm::executor::MemoryType::kCPU_PINNED (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10MemoryType15kCPU_PINNEDPOOLE">tensorrt_llm::executor::MemoryType::kCPU_PINNEDPOOL (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10MemoryType4kGPUE">tensorrt_llm::executor::MemoryType::kGPU (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10MemoryType8kUNKNOWNE">tensorrt_llm::executor::MemoryType::kUNKNOWN (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10MemoryType4kUVME">tensorrt_llm::executor::MemoryType::kUVM (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor16MillisecondsTypeE">tensorrt_llm::executor::MillisecondsType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor9ModelTypeE">tensorrt_llm::executor::ModelType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor9ModelType13kDECODER_ONLYE">tensorrt_llm::executor::ModelType::kDECODER_ONLY (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor9ModelType16kENCODER_DECODERE">tensorrt_llm::executor::ModelType::kENCODER_DECODER (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor9ModelType13kENCODER_ONLYE">tensorrt_llm::executor::ModelType::kENCODER_ONLY (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11MropeConfigE">tensorrt_llm::executor::MropeConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor11MropeConfig22getMRopePositionDeltasEv">tensorrt_llm::executor::MropeConfig::getMRopePositionDeltas (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor11MropeConfig20getMRopeRotarySinCosEv">tensorrt_llm::executor::MropeConfig::getMRopeRotarySinCos (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11MropeConfig20mMRopePositionDeltasE">tensorrt_llm::executor::MropeConfig::mMRopePositionDeltas (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11MropeConfig18mMRopeRotarySinCosE">tensorrt_llm::executor::MropeConfig::mMRopeRotarySinCos (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11MropeConfig11MropeConfigE6Tensor10SizeType32">tensorrt_llm::executor::MropeConfig::MropeConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE21ContextChunkingPolicy">tensorrt_llm::executor::operator&lt;&lt; (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE23CapacitySchedulerPolicy">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18OrchestratorConfigE">tensorrt_llm::executor::OrchestratorConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getIsOrchestratorEv">tensorrt_llm::executor::OrchestratorConfig::getIsOrchestrator (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getOrchLeaderCommEv">tensorrt_llm::executor::OrchestratorConfig::getOrchLeaderComm (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getSpawnProcessesEv">tensorrt_llm::executor::OrchestratorConfig::getSpawnProcesses (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig23getWorkerExecutablePathEv">tensorrt_llm::executor::OrchestratorConfig::getWorkerExecutablePath (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mIsOrchestratorE">tensorrt_llm::executor::OrchestratorConfig::mIsOrchestrator (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mOrchLeaderCommE">tensorrt_llm::executor::OrchestratorConfig::mOrchLeaderComm (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mSpawnProcessesE">tensorrt_llm::executor::OrchestratorConfig::mSpawnProcesses (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18OrchestratorConfig21mWorkerExecutablePathE">tensorrt_llm::executor::OrchestratorConfig::mWorkerExecutablePath (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb">tensorrt_llm::executor::OrchestratorConfig::OrchestratorConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setIsOrchestratorEb">tensorrt_llm::executor::OrchestratorConfig::setIsOrchestrator (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setOrchLeaderCommERKNSt10shared_ptrIN3mpi7MpiCommEEE">tensorrt_llm::executor::OrchestratorConfig::setOrchLeaderComm (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setSpawnProcessesEb">tensorrt_llm::executor::OrchestratorConfig::setSpawnProcesses (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18OrchestratorConfig23setWorkerExecutablePathERKNSt6stringE">tensorrt_llm::executor::OrchestratorConfig::setWorkerExecutablePath (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12OutputConfigE">tensorrt_llm::executor::OutputConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12OutputConfig22excludeInputFromOutputE">tensorrt_llm::executor::OutputConfig::excludeInputFromOutput (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbb">tensorrt_llm::executor::OutputConfig::OutputConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12OutputConfig19returnContextLogitsE">tensorrt_llm::executor::OutputConfig::returnContextLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12OutputConfig19returnEncoderOutputE">tensorrt_llm::executor::OutputConfig::returnEncoderOutput (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12OutputConfig22returnGenerationLogitsE">tensorrt_llm::executor::OutputConfig::returnGenerationLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12OutputConfig14returnLogProbsE">tensorrt_llm::executor::OutputConfig::returnLogProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12OutputConfig17returnPerfMetricsE">tensorrt_llm::executor::OutputConfig::returnPerfMetrics (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfigE">tensorrt_llm::executor::ParallelConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ParallelConfig20getCommunicationModeEv">tensorrt_llm::executor::ParallelConfig::getCommunicationMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ParallelConfig20getCommunicationTypeEv">tensorrt_llm::executor::ParallelConfig::getCommunicationType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ParallelConfig12getDeviceIdsEv">tensorrt_llm::executor::ParallelConfig::getDeviceIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ParallelConfig21getOrchestratorConfigEv">tensorrt_llm::executor::ParallelConfig::getOrchestratorConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ParallelConfig17getParticipantIdsEv">tensorrt_llm::executor::ParallelConfig::getParticipantIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfig9mCommModeE">tensorrt_llm::executor::ParallelConfig::mCommMode (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfig9mCommTypeE">tensorrt_llm::executor::ParallelConfig::mCommType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfig10mDeviceIdsE">tensorrt_llm::executor::ParallelConfig::mDeviceIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfig19mOrchestratorConfigE">tensorrt_llm::executor::ParallelConfig::mOrchestratorConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfig15mParticipantIdsE">tensorrt_llm::executor::ParallelConfig::mParticipantIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEE">tensorrt_llm::executor::ParallelConfig::ParallelConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationModeE17CommunicationMode">tensorrt_llm::executor::ParallelConfig::setCommunicationMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationTypeE17CommunicationType">tensorrt_llm::executor::ParallelConfig::setCommunicationType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfig12setDeviceIdsERKNSt6vectorI10SizeType32EE">tensorrt_llm::executor::ParallelConfig::setDeviceIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfig21setOrchestratorConfigERK18OrchestratorConfig">tensorrt_llm::executor::ParallelConfig::setOrchestratorConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfig17setParticipantIdsERKNSt6vectorI10SizeType32EE">tensorrt_llm::executor::ParallelConfig::setParticipantIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfigE">tensorrt_llm::executor::PeftCacheConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getDeviceCachePercentEv">tensorrt_llm::executor::PeftCacheConfig::getDeviceCachePercent (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig16getHostCacheSizeEv">tensorrt_llm::executor::PeftCacheConfig::getHostCacheSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig17getMaxAdapterSizeEv">tensorrt_llm::executor::PeftCacheConfig::getMaxAdapterSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig25getMaxPagesPerBlockDeviceEv">tensorrt_llm::executor::PeftCacheConfig::getMaxPagesPerBlockDevice (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig23getMaxPagesPerBlockHostEv">tensorrt_llm::executor::PeftCacheConfig::getMaxPagesPerBlockHost (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig17getNumCopyStreamsEv">tensorrt_llm::executor::PeftCacheConfig::getNumCopyStreams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig23getNumDeviceModuleLayerEv">tensorrt_llm::executor::PeftCacheConfig::getNumDeviceModuleLayer (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig19getNumEnsureWorkersEv">tensorrt_llm::executor::PeftCacheConfig::getNumEnsureWorkers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getNumHostModuleLayerEv">tensorrt_llm::executor::PeftCacheConfig::getNumHostModuleLayer (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig16getNumPutWorkersEv">tensorrt_llm::executor::PeftCacheConfig::getNumPutWorkers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getOptimalAdapterSizeEv">tensorrt_llm::executor::PeftCacheConfig::getOptimalAdapterSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig22kDefaultMaxAdapterSizeE">tensorrt_llm::executor::PeftCacheConfig::kDefaultMaxAdapterSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig30kDefaultMaxPagesPerBlockDeviceE">tensorrt_llm::executor::PeftCacheConfig::kDefaultMaxPagesPerBlockDevice (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig28kDefaultMaxPagesPerBlockHostE">tensorrt_llm::executor::PeftCacheConfig::kDefaultMaxPagesPerBlockHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig26kDefaultOptimalAdapterSizeE">tensorrt_llm::executor::PeftCacheConfig::kDefaultOptimalAdapterSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mDeviceCachePercentE">tensorrt_llm::executor::PeftCacheConfig::mDeviceCachePercent (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig14mHostCacheSizeE">tensorrt_llm::executor::PeftCacheConfig::mHostCacheSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15mMaxAdapterSizeE">tensorrt_llm::executor::PeftCacheConfig::mMaxAdapterSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig23mMaxPagesPerBlockDeviceE">tensorrt_llm::executor::PeftCacheConfig::mMaxPagesPerBlockDevice (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig21mMaxPagesPerBlockHostE">tensorrt_llm::executor::PeftCacheConfig::mMaxPagesPerBlockHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15mNumCopyStreamsE">tensorrt_llm::executor::PeftCacheConfig::mNumCopyStreams (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig21mNumDeviceModuleLayerE">tensorrt_llm::executor::PeftCacheConfig::mNumDeviceModuleLayer (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig17mNumEnsureWorkersE">tensorrt_llm::executor::PeftCacheConfig::mNumEnsureWorkers (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mNumHostModuleLayerE">tensorrt_llm::executor::PeftCacheConfig::mNumHostModuleLayer (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig14mNumPutWorkersE">tensorrt_llm::executor::PeftCacheConfig::mNumPutWorkers (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mOptimalAdapterSizeE">tensorrt_llm::executor::PeftCacheConfig::mOptimalAdapterSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfigeqERK15PeftCacheConfig">tensorrt_llm::executor::PeftCacheConfig::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEE">tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19PhonyNameDueToError5valueE">tensorrt_llm::executor::PhonyNameDueToError::value (C++ member)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19PhonyNameDueToError5valueE">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19PhonyNameDueToError5valueE">[2]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19PhonyNameDueToError5valueE">[3]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12PriorityTypeE">tensorrt_llm::executor::PriorityType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18PromptTuningConfigE">tensorrt_llm::executor::PromptTuningConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor18PromptTuningConfig17getEmbeddingTableEv">tensorrt_llm::executor::PromptTuningConfig::getEmbeddingTable (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor18PromptTuningConfig21getInputTokenExtraIdsEv">tensorrt_llm::executor::PromptTuningConfig::getInputTokenExtraIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18PromptTuningConfig15mEmbeddingTableE">tensorrt_llm::executor::PromptTuningConfig::mEmbeddingTable (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18PromptTuningConfig19mInputTokenExtraIdsE">tensorrt_llm::executor::PromptTuningConfig::mInputTokenExtraIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18PromptTuningConfig18PromptTuningConfigE6TensorNSt8optionalI16VecTokenExtraIdsEE">tensorrt_llm::executor::PromptTuningConfig::PromptTuningConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14RandomSeedTypeE">tensorrt_llm::executor::RandomSeedType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7RequestE">tensorrt_llm::executor::Request (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request17getAllottedTimeMsEv">tensorrt_llm::executor::Request::getAllottedTimeMs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request11getBadWordsEv">tensorrt_llm::executor::Request::getBadWords (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request11getClientIdEv">tensorrt_llm::executor::Request::getClientId (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request21getContextPhaseParamsEv">tensorrt_llm::executor::Request::getContextPhaseParams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request21getCrossAttentionMaskEv">tensorrt_llm::executor::Request::getCrossAttentionMask (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request14getEagleConfigEv">tensorrt_llm::executor::Request::getEagleConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request16getEmbeddingBiasEv">tensorrt_llm::executor::Request::getEmbeddingBias (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request23getEncoderInputFeaturesEv">tensorrt_llm::executor::Request::getEncoderInputFeatures (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request23getEncoderInputTokenIdsEv">tensorrt_llm::executor::Request::getEncoderInputTokenIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request22getEncoderOutputLengthEv">tensorrt_llm::executor::Request::getEncoderOutputLength (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request8getEndIdEv">tensorrt_llm::executor::Request::getEndId (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request28getExternalDraftTokensConfigEv">tensorrt_llm::executor::Request::getExternalDraftTokensConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request23getGuidedDecodingParamsEv">tensorrt_llm::executor::Request::getGuidedDecodingParams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request16getInputTokenIdsEv">tensorrt_llm::executor::Request::getInputTokenIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request25getKvCacheRetentionConfigEv">tensorrt_llm::executor::Request::getKvCacheRetentionConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request26getLogitsPostProcessorNameEv">tensorrt_llm::executor::Request::getLogitsPostProcessorName (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request18getLookaheadConfigEv">tensorrt_llm::executor::Request::getLookaheadConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request13getLoraConfigEv">tensorrt_llm::executor::Request::getLoraConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request15getMaxNewTokensEv">tensorrt_llm::executor::Request::getMaxNewTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request12getMaxTokensEv">tensorrt_llm::executor::Request::getMaxTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request14getMropeConfigEv">tensorrt_llm::executor::Request::getMropeConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request21getNumReturnSequencesEv">tensorrt_llm::executor::Request::getNumReturnSequences (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request15getOutputConfigEv">tensorrt_llm::executor::Request::getOutputConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request8getPadIdEv">tensorrt_llm::executor::Request::getPadId (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request14getPositionIdsEv">tensorrt_llm::executor::Request::getPositionIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request11getPriorityEv">tensorrt_llm::executor::Request::getPriority (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request21getPromptTuningConfigEv">tensorrt_llm::executor::Request::getPromptTuningConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request14getRequestTypeEv">tensorrt_llm::executor::Request::getRequestType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request27getReturnAllGeneratedTokensEv">tensorrt_llm::executor::Request::getReturnAllGeneratedTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request17getSamplingConfigEv">tensorrt_llm::executor::Request::getSamplingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request22getSkipCrossAttnBlocksEv">tensorrt_llm::executor::Request::getSkipCrossAttnBlocks (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request12getStopWordsEv">tensorrt_llm::executor::Request::getStopWords (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request12getStreamingEv">tensorrt_llm::executor::Request::getStreaming (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request25kBatchedPostProcessorNameE">tensorrt_llm::executor::Request::kBatchedPostProcessorName (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request16kDefaultPriorityE">tensorrt_llm::executor::Request::kDefaultPriority (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request5mImplE">tensorrt_llm::executor::Request::mImpl (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7RequestaSERK7Request">tensorrt_llm::executor::Request::operator= (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7RequestaSERR7Request">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI16MillisecondsTypeEE">tensorrt_llm::executor::Request::Request (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request7RequestERK7Request">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request7RequestERR7Request">[2]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request17setAllottedTimeMsE16MillisecondsType">tensorrt_llm::executor::Request::setAllottedTimeMs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request11setBadWordsERKNSt4listI9VecTokensEE">tensorrt_llm::executor::Request::setBadWords (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request11setClientIdE6IdType">tensorrt_llm::executor::Request::setClientId (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request21setContextPhaseParamsE18ContextPhaseParams">tensorrt_llm::executor::Request::setContextPhaseParams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request21setCrossAttentionMaskE6Tensor">tensorrt_llm::executor::Request::setCrossAttentionMask (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request14setEagleConfigERKNSt8optionalI11EagleConfigEE">tensorrt_llm::executor::Request::setEagleConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request16setEmbeddingBiasERK6Tensor">tensorrt_llm::executor::Request::setEmbeddingBias (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputFeaturesE6Tensor">tensorrt_llm::executor::Request::setEncoderInputFeatures (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputTokenIdsERK9VecTokens">tensorrt_llm::executor::Request::setEncoderInputTokenIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request22setEncoderOutputLengthE10SizeType32">tensorrt_llm::executor::Request::setEncoderOutputLength (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request8setEndIdE10SizeType32">tensorrt_llm::executor::Request::setEndId (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request28setExternalDraftTokensConfigERK25ExternalDraftTokensConfig">tensorrt_llm::executor::Request::setExternalDraftTokensConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request23setGuidedDecodingParamsERK20GuidedDecodingParams">tensorrt_llm::executor::Request::setGuidedDecodingParams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request25setKvCacheRetentionConfigERK22KvCacheRetentionConfig">tensorrt_llm::executor::Request::setKvCacheRetentionConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request26setLogitsPostProcessorNameERKNSt6stringE">tensorrt_llm::executor::Request::setLogitsPostProcessorName (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request18setLookaheadConfigERK23LookaheadDecodingConfig">tensorrt_llm::executor::Request::setLookaheadConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request13setLoraConfigERK10LoraConfig">tensorrt_llm::executor::Request::setLoraConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request14setMropeConfigERK11MropeConfig">tensorrt_llm::executor::Request::setMropeConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request21setNumReturnSequencesE10SizeType32">tensorrt_llm::executor::Request::setNumReturnSequences (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request15setOutputConfigERK12OutputConfig">tensorrt_llm::executor::Request::setOutputConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request8setPadIdE10SizeType32">tensorrt_llm::executor::Request::setPadId (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request14setPositionIdsERKNSt6vectorI10SizeType32EE">tensorrt_llm::executor::Request::setPositionIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request11setPriorityE12PriorityType">tensorrt_llm::executor::Request::setPriority (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request21setPromptTuningConfigERK18PromptTuningConfig">tensorrt_llm::executor::Request::setPromptTuningConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request14setRequestTypeERK11RequestType">tensorrt_llm::executor::Request::setRequestType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request27setReturnAllGeneratedTokensEb">tensorrt_llm::executor::Request::setReturnAllGeneratedTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request17setSamplingConfigERK14SamplingConfig">tensorrt_llm::executor::Request::setSamplingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request22setSkipCrossAttnBlocksE6Tensor">tensorrt_llm::executor::Request::setSkipCrossAttnBlocks (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request12setStopWordsERKNSt4listI9VecTokensEE">tensorrt_llm::executor::Request::setStopWords (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request12setStreamingEb">tensorrt_llm::executor::Request::setStreaming (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7RequestD0Ev">tensorrt_llm::executor::Request::~Request (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetricsE">tensorrt_llm::executor::RequestPerfMetrics (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics9firstIterE">tensorrt_llm::executor::RequestPerfMetrics::firstIter (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics4iterE">tensorrt_llm::executor::RequestPerfMetrics::iter (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14kvCacheMetricsE">tensorrt_llm::executor::RequestPerfMetrics::kvCacheMetrics (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetricsE">tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics14kvCacheHitRateE">tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::kvCacheHitRate (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics15numMissedBlocksE">tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::numMissedBlocks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics21numNewAllocatedBlocksE">tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::numNewAllocatedBlocks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics15numReusedBlocksE">tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::numReusedBlocks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics23numTotalAllocatedBlocksE">tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::numTotalAllocatedBlocks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics8lastIterE">tensorrt_llm::executor::RequestPerfMetrics::lastIter (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics9TimePointE">tensorrt_llm::executor::RequestPerfMetrics::TimePoint (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13timingMetricsE">tensorrt_llm::executor::RequestPerfMetrics::timingMetrics (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetricsE">tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics11arrivalTimeE">tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::arrivalTime (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics18firstScheduledTimeE">tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::firstScheduledTime (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics14firstTokenTimeE">tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::firstTokenTime (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics18kvCacheTransferEndE">tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::kvCacheTransferEnd (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics20kvCacheTransferStartE">tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::kvCacheTransferStart (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics13lastTokenTimeE">tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::lastTokenTime (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStageE">tensorrt_llm::executor::RequestStage (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStage20kCONTEXT_IN_PROGRESSE">tensorrt_llm::executor::RequestStage::kCONTEXT_IN_PROGRESS (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStage20kENCODER_IN_PROGRESSE">tensorrt_llm::executor::RequestStage::kENCODER_IN_PROGRESS (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStage20kGENERATION_COMPLETEE">tensorrt_llm::executor::RequestStage::kGENERATION_COMPLETE (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStage23kGENERATION_IN_PROGRESSE">tensorrt_llm::executor::RequestStage::kGENERATION_IN_PROGRESS (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStage7kQUEUEDE">tensorrt_llm::executor::RequestStage::kQUEUED (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStatsE">tensorrt_llm::executor::RequestStats (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStats24allocNewBlocksPerRequestE">tensorrt_llm::executor::RequestStats::allocNewBlocksPerRequest (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStats26allocTotalBlocksPerRequestE">tensorrt_llm::executor::RequestStats::allocTotalBlocksPerRequest (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStats26avgNumDecodedTokensPerIterE">tensorrt_llm::executor::RequestStats::avgNumDecodedTokensPerIter (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStats22contextPrefillPositionE">tensorrt_llm::executor::RequestStats::contextPrefillPosition (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStats15disServingStatsE">tensorrt_llm::executor::RequestStats::disServingStats (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStats2idE">tensorrt_llm::executor::RequestStats::id (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStats24kvCacheHitRatePerRequestE">tensorrt_llm::executor::RequestStats::kvCacheHitRatePerRequest (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStats22missedBlocksPerRequestE">tensorrt_llm::executor::RequestStats::missedBlocksPerRequest (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStats18numGeneratedTokensE">tensorrt_llm::executor::RequestStats::numGeneratedTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStats6pausedE">tensorrt_llm::executor::RequestStats::paused (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStats22reusedBlocksPerRequestE">tensorrt_llm::executor::RequestStats::reusedBlocksPerRequest (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStats9scheduledE">tensorrt_llm::executor::RequestStats::scheduled (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStats5stageE">tensorrt_llm::executor::RequestStats::stage (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor24RequestStatsPerIterationE">tensorrt_llm::executor::RequestStatsPerIteration (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor24RequestStatsPerIteration4iterE">tensorrt_llm::executor::RequestStatsPerIteration::iter (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor24RequestStatsPerIteration12requestStatsE">tensorrt_llm::executor::RequestStatsPerIteration::requestStats (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11RequestTypeE">tensorrt_llm::executor::RequestType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11RequestType35REQUEST_TYPE_CONTEXT_AND_GENERATIONE">tensorrt_llm::executor::RequestType::REQUEST_TYPE_CONTEXT_AND_GENERATION (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11RequestType25REQUEST_TYPE_CONTEXT_ONLYE">tensorrt_llm::executor::RequestType::REQUEST_TYPE_CONTEXT_ONLY (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11RequestType28REQUEST_TYPE_GENERATION_ONLYE">tensorrt_llm::executor::RequestType::REQUEST_TYPE_GENERATION_ONLY (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8ResponseE">tensorrt_llm::executor::Response (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8Response11getClientIdEv">tensorrt_llm::executor::Response::getClientId (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8Response11getErrorMsgEv">tensorrt_llm::executor::Response::getErrorMsg (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8Response12getRequestIdEv">tensorrt_llm::executor::Response::getRequestId (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8Response9getResultEv">tensorrt_llm::executor::Response::getResult (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8Response8hasErrorEv">tensorrt_llm::executor::Response::hasError (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Response5mImplE">tensorrt_llm::executor::Response::mImpl (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8ResponseaSERK8Response">tensorrt_llm::executor::Response::operator= (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8ResponseaSERR8Response">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6ResultNSt8optionalI6IdTypeEE">tensorrt_llm::executor::Response::Response (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringENSt8optionalI6IdTypeEE">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Response8ResponseERK8Response">[2]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Response8ResponseERR8Response">[3]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8ResponseD0Ev">tensorrt_llm::executor::Response::~Response (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6ResultE">tensorrt_llm::executor::Result (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result13contextLogitsE">tensorrt_llm::executor::Result::contextLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result18contextPhaseParamsE">tensorrt_llm::executor::Result::contextPhaseParams (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result11cumLogProbsE">tensorrt_llm::executor::Result::cumLogProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result12decodingIterE">tensorrt_llm::executor::Result::decodingIter (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result13encoderOutputE">tensorrt_llm::executor::Result::encoderOutput (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result13finishReasonsE">tensorrt_llm::executor::Result::finishReasons (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result16generationLogitsE">tensorrt_llm::executor::Result::generationLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result7isFinalE">tensorrt_llm::executor::Result::isFinal (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result15isSequenceFinalE">tensorrt_llm::executor::Result::isSequenceFinal (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result8logProbsE">tensorrt_llm::executor::Result::logProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result14outputTokenIdsE">tensorrt_llm::executor::Result::outputTokenIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result18requestPerfMetricsE">tensorrt_llm::executor::Result::requestPerfMetrics (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result13sequenceIndexE">tensorrt_llm::executor::Result::sequenceIndex (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result21specDecFastLogitsInfoE">tensorrt_llm::executor::Result::specDecFastLogitsInfo (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17RetentionPriorityE">tensorrt_llm::executor::RetentionPriority (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDurationE">tensorrt_llm::executor::RetentionPriorityAndDuration (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration10durationMsE">tensorrt_llm::executor::RetentionPriorityAndDuration::durationMs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration17retentionPriorityE">tensorrt_llm::executor::RetentionPriorityAndDuration::retentionPriority (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration28RetentionPriorityAndDurationERKNSt8optionalI17RetentionPriorityEERKNSt8optionalINSt6chrono12millisecondsEEE">tensorrt_llm::executor::RetentionPriorityAndDuration::RetentionPriorityAndDuration (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfigE">tensorrt_llm::executor::SamplingConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig28checkBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SamplingConfig::checkBeamSearchDiversityRate (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkBeamWidthE10SizeType32">tensorrt_llm::executor::SamplingConfig::checkBeamWidth (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkMinTokensERKNSt8optionalI10SizeType32EE">tensorrt_llm::executor::SamplingConfig::checkMinTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE">tensorrt_llm::executor::SamplingConfig::checkNoRepeatNgramSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkNumReturnSequencesERKNSt8optionalI10SizeType32EE10SizeType32">tensorrt_llm::executor::SamplingConfig::checkNumReturnSequences (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkRepetitionPenaltyERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SamplingConfig::checkRepetitionPenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig16checkTemperatureERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SamplingConfig::checkTemperature (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopKERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SamplingConfig::checkTopK (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopPERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SamplingConfig::checkTopP (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkTopPDecayERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SamplingConfig::checkTopPDecay (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig12checkTopPMinERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SamplingConfig::checkTopPMin (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig17checkTopPResetIdsERKNSt8optionalI11TokenIdTypeEE">tensorrt_llm::executor::SamplingConfig::checkTopPResetIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig26getBeamSearchDiversityRateEv">tensorrt_llm::executor::SamplingConfig::getBeamSearchDiversityRate (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getBeamWidthEv">tensorrt_llm::executor::SamplingConfig::getBeamWidth (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig16getEarlyStoppingEv">tensorrt_llm::executor::SamplingConfig::getEarlyStopping (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig19getFrequencyPenaltyEv">tensorrt_llm::executor::SamplingConfig::getFrequencyPenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig16getLengthPenaltyEv">tensorrt_llm::executor::SamplingConfig::getLengthPenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getMinLengthEv">tensorrt_llm::executor::SamplingConfig::getMinLength (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getMinTokensEv">tensorrt_llm::executor::SamplingConfig::getMinTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig20getNoRepeatNgramSizeEv">tensorrt_llm::executor::SamplingConfig::getNoRepeatNgramSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig17getNumReturnBeamsEv">tensorrt_llm::executor::SamplingConfig::getNumReturnBeams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig21getNumReturnSequencesEv">tensorrt_llm::executor::SamplingConfig::getNumReturnSequences (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig18getPresencePenaltyEv">tensorrt_llm::executor::SamplingConfig::getPresencePenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig13getRandomSeedEv">tensorrt_llm::executor::SamplingConfig::getRandomSeed (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig20getRepetitionPenaltyEv">tensorrt_llm::executor::SamplingConfig::getRepetitionPenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getSeedEv">tensorrt_llm::executor::SamplingConfig::getSeed (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig14getTemperatureEv">tensorrt_llm::executor::SamplingConfig::getTemperature (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getTopKEv">tensorrt_llm::executor::SamplingConfig::getTopK (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getTopPEv">tensorrt_llm::executor::SamplingConfig::getTopP (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getTopPDecayEv">tensorrt_llm::executor::SamplingConfig::getTopPDecay (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig10getTopPMinEv">tensorrt_llm::executor::SamplingConfig::getTopPMin (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig15getTopPResetIdsEv">tensorrt_llm::executor::SamplingConfig::getTopPResetIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig24mBeamSearchDiversityRateE">tensorrt_llm::executor::SamplingConfig::mBeamSearchDiversityRate (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig10mBeamWidthE">tensorrt_llm::executor::SamplingConfig::mBeamWidth (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig14mEarlyStoppingE">tensorrt_llm::executor::SamplingConfig::mEarlyStopping (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig17mFrequencyPenaltyE">tensorrt_llm::executor::SamplingConfig::mFrequencyPenalty (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig14mLengthPenaltyE">tensorrt_llm::executor::SamplingConfig::mLengthPenalty (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig10mMinTokensE">tensorrt_llm::executor::SamplingConfig::mMinTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig18mNoRepeatNgramSizeE">tensorrt_llm::executor::SamplingConfig::mNoRepeatNgramSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig15mNumReturnBeamsE">tensorrt_llm::executor::SamplingConfig::mNumReturnBeams (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig19mNumReturnSequencesE">tensorrt_llm::executor::SamplingConfig::mNumReturnSequences (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig16mPresencePenaltyE">tensorrt_llm::executor::SamplingConfig::mPresencePenalty (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig18mRepetitionPenaltyE">tensorrt_llm::executor::SamplingConfig::mRepetitionPenalty (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig5mSeedE">tensorrt_llm::executor::SamplingConfig::mSeed (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig12mTemperatureE">tensorrt_llm::executor::SamplingConfig::mTemperature (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig5mTopKE">tensorrt_llm::executor::SamplingConfig::mTopK (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig5mTopPE">tensorrt_llm::executor::SamplingConfig::mTopP (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig10mTopPDecayE">tensorrt_llm::executor::SamplingConfig::mTopPDecay (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig8mTopPMinE">tensorrt_llm::executor::SamplingConfig::mTopPMin (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig13mTopPResetIdsE">tensorrt_llm::executor::SamplingConfig::mTopPResetIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfigeqERK14SamplingConfig">tensorrt_llm::executor::SamplingConfig::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EE">tensorrt_llm::executor::SamplingConfig::SamplingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig26setBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SamplingConfig::setBeamSearchDiversityRate (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig12setBeamWidthE10SizeType32">tensorrt_llm::executor::SamplingConfig::setBeamWidth (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig16setEarlyStoppingERKNSt8optionalI10SizeType32EE">tensorrt_llm::executor::SamplingConfig::setEarlyStopping (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig19setFrequencyPenaltyERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SamplingConfig::setFrequencyPenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig16setLengthPenaltyERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SamplingConfig::setLengthPenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig12setMinLengthERKNSt8optionalI10SizeType32EE">tensorrt_llm::executor::SamplingConfig::setMinLength (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig12setMinTokensERKNSt8optionalI10SizeType32EE">tensorrt_llm::executor::SamplingConfig::setMinTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig20setNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE">tensorrt_llm::executor::SamplingConfig::setNoRepeatNgramSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig21setNumReturnSequencesERKNSt8optionalI10SizeType32EE">tensorrt_llm::executor::SamplingConfig::setNumReturnSequences (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig18setPresencePenaltyERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SamplingConfig::setPresencePenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig13setRandomSeedERKNSt8optionalI14RandomSeedTypeEE">tensorrt_llm::executor::SamplingConfig::setRandomSeed (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig20setRepetitionPenaltyERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SamplingConfig::setRepetitionPenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig7setSeedERKNSt8optionalI14RandomSeedTypeEE">tensorrt_llm::executor::SamplingConfig::setSeed (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig14setTemperatureERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SamplingConfig::setTemperature (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopKERKNSt8optionalI10SizeType32EE">tensorrt_llm::executor::SamplingConfig::setTopK (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopPERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SamplingConfig::setTopP (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig12setTopPDecayERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SamplingConfig::setTopPDecay (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig10setTopPMinERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SamplingConfig::setTopPMin (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig15setTopPResetIdsERKNSt8optionalI11TokenIdTypeEE">tensorrt_llm::executor::SamplingConfig::setTopPResetIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig20updateNumReturnBeamsEv">tensorrt_llm::executor::SamplingConfig::updateNumReturnBeams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15SchedulerConfigE">tensorrt_llm::executor::SchedulerConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15SchedulerConfig26getCapacitySchedulerPolicyEv">tensorrt_llm::executor::SchedulerConfig::getCapacitySchedulerPolicy (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15SchedulerConfig24getContextChunkingPolicyEv">tensorrt_llm::executor::SchedulerConfig::getContextChunkingPolicy (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15SchedulerConfig21getDynamicBatchConfigEv">tensorrt_llm::executor::SchedulerConfig::getDynamicBatchConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15SchedulerConfig24mCapacitySchedulerPolicyE">tensorrt_llm::executor::SchedulerConfig::mCapacitySchedulerPolicy (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15SchedulerConfig22mContextChunkingPolicyE">tensorrt_llm::executor::SchedulerConfig::mContextChunkingPolicy (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15SchedulerConfig19mDynamicBatchConfigE">tensorrt_llm::executor::SchedulerConfig::mDynamicBatchConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15SchedulerConfigeqERK15SchedulerConfig">tensorrt_llm::executor::SchedulerConfig::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE23CapacitySchedulerPolicyNSt8optionalI21ContextChunkingPolicyEENSt8optionalI18DynamicBatchConfigEE">tensorrt_llm::executor::SchedulerConfig::SchedulerConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13SerializationE">tensorrt_llm::executor::Serialization (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization15deserializeBoolERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeBool (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization21deserializeCacheStateERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeCacheState (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization20deserializeCommStateERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeCommState (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization29deserializeContextPhaseParamsERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeContextPhaseParams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeDataTransceiverState (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization22deserializeDebugConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeDebugConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization25deserializeDecodingConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeDecodingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization23deserializeDecodingModeERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeDecodingMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization33deserializeDisServingRequestStatsERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeDisServingRequestStats (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization29deserializeDynamicBatchConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeDynamicBatchConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization22deserializeEagleConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeEagleConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization25deserializeExecutorConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeExecutorConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization40deserializeExtendedRuntimePerfKnobConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeExtendedRuntimePerfKnobConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization36deserializeExternalDraftTokensConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeExternalDraftTokensConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeGuidedDecodingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingParamsERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeGuidedDecodingParams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization32deserializeInflightBatchingStatsERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeInflightBatchingStats (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt6vectorIcEE">tensorrt_llm::executor::Serialization::deserializeIterationStats (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt7istreamE">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization28deserializeIterationStatsVecERNSt6vectorIcEE">tensorrt_llm::executor::Serialization::deserializeIterationStatsVec (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization24deserializeKvCacheConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeKvCacheConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization33deserializeKvCacheRetentionConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeKvCacheRetentionConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization23deserializeKvCacheStatsERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeKvCacheStats (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization34deserializeLookaheadDecodingConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeLookaheadDecodingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization21deserializeLoraConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeLoraConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization20deserializeModelTypeERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeModelType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization22deserializeMropeConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeMropeConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization29deserializeOrchestratorConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeOrchestratorConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization23deserializeOutputConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeOutputConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization25deserializeParallelConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeParallelConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization26deserializePeftCacheConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializePeftCacheConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization29deserializePromptTuningConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializePromptTuningConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization18deserializeRequestERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeRequest (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization29deserializeRequestPerfMetricsERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeRequestPerfMetrics (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStageERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeRequestStage (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStatsERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeRequestStats (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt6vectorIcEE">tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIteration (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt7istreamE">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization38deserializeRequestStatsPerIterationVecERNSt6vectorIcEE">tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIterationVec (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization19deserializeResponseERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeResponse (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization20deserializeResponsesERNSt6vectorIcEE">tensorrt_llm::executor::Serialization::deserializeResponses (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization17deserializeResultERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeResult (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization25deserializeSamplingConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeSamplingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization26deserializeSchedulerConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeSchedulerConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization22deserializeSocketStateERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeSocketState (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization32deserializeSpecDecFastLogitsInfoERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeSpecDecFastLogitsInfo (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization36deserializeSpeculativeDecodingConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeSpeculativeDecodingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization30deserializeStaticBatchingStatsERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeStaticBatchingStats (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization17deserializeStringERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeString (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization17deserializeTensorERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeTensor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization20deserializeTimePointERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeTimePoint (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization36deserializeTokenRangeRetentionConfigERNSt7istreamE">tensorrt_llm::executor::Serialization::deserializeTokenRangeRetentionConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK10LoraConfigRNSt7ostreamE">tensorrt_llm::executor::Serialization::serialize (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11DebugConfigRNSt7ostreamE">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11EagleConfigRNSt7ostreamE">[2]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11MropeConfigRNSt7ostreamE">[3]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12DecodingModeRNSt7ostreamE">[4]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KvCacheStatsRNSt7ostreamE">[5]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12OutputConfigRNSt7ostreamE">[6]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStageRNSt7ostreamE">[7]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStatsRNSt7ostreamE">[8]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK13KvCacheConfigRNSt7ostreamE">[9]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14DecodingConfigRNSt7ostreamE">[10]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ExecutorConfigRNSt7ostreamE">[11]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStats">[12]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStatsRNSt7ostreamE">[13]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ParallelConfigRNSt7ostreamE">[14]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14SamplingConfigRNSt7ostreamE">[15]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15PeftCacheConfigRNSt7ostreamE">[16]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15SchedulerConfigRNSt7ostreamE">[17]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18ContextPhaseParamsRNSt7ostreamE">[18]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18DynamicBatchConfigRNSt7ostreamE">[19]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18OrchestratorConfigRNSt7ostreamE">[20]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18PromptTuningConfigRNSt7ostreamE">[21]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18RequestPerfMetricsRNSt7ostreamE">[22]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK19StaticBatchingStatsRNSt7ostreamE">[23]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverStateRNSt7ostreamE">[24]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingConfigRNSt7ostreamE">[25]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingParamsRNSt7ostreamE">[26]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21InflightBatchingStatsRNSt7ostreamE">[27]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22DisServingRequestStatsRNSt7ostreamE">[28]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KvCacheRetentionConfigRNSt7ostreamE">[29]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK23LookaheadDecodingConfigRNSt7ostreamE">[30]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIteration">[31]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIterationRNSt7ostreamE">[32]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25ExternalDraftTokensConfigRNSt7ostreamE">[33]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25SpeculativeDecodingConfigRNSt7ostreamE">[34]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK29ExtendedRuntimePerfKnobConfigRNSt7ostreamE">[35]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK33SpeculativeDecodingFastLogitsInfoRNSt7ostreamE">[36]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6ResultRNSt7ostreamE">[37]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6TensorRNSt7ostreamE">[38]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK7RequestRNSt7ostreamE">[39]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK8ResponseRNSt7ostreamE">[40]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN18RequestPerfMetrics9TimePointERNSt7ostreamE">[41]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigERNSt7ostreamE">[42]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10CacheStateERNSt7ostreamE">[43]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache11SocketStateERNSt7ostreamE">[44]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache9CommStateERNSt7ostreamE">[45]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI14IterationStatsEE">[46]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI24RequestStatsPerIterationEE">[47]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI8ResponseEE">[48]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK10LoraConfig">tensorrt_llm::executor::Serialization::serializedSize (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11DebugConfig">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11EagleConfig">[2]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11MropeConfig">[3]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12DecodingMode">[4]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12KvCacheStats">[5]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12OutputConfig">[6]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStage">[7]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStats">[8]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK13KvCacheConfig">[9]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14DecodingConfig">[10]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ExecutorConfig">[11]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14IterationStats">[12]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ParallelConfig">[13]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14SamplingConfig">[14]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15PeftCacheConfig">[15]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15SchedulerConfig">[16]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18ContextPhaseParams">[17]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18DynamicBatchConfig">[18]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18OrchestratorConfig">[19]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18PromptTuningConfig">[20]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18RequestPerfMetrics">[21]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK19StaticBatchingStats">[22]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20DataTransceiverState">[23]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingConfig">[24]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingParams">[25]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21InflightBatchingStats">[26]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22DisServingRequestStats">[27]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22KvCacheRetentionConfig">[28]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK23LookaheadDecodingConfig">[29]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK24RequestStatsPerIteration">[30]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25ExternalDraftTokensConfig">[31]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25SpeculativeDecodingConfig">[32]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK29ExtendedRuntimePerfKnobConfig">[33]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK33SpeculativeDecodingFastLogitsInfo">[34]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Result">[35]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Tensor">[36]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK7Request">[37]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK8Response">[38]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN18RequestPerfMetrics9TimePointE">[39]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigE">[40]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10CacheStateE">[41]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache11SocketStateE">[42]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache9CommStateE">[43]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor5ShapeE">tensorrt_llm::executor::Shape (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor5Shape4BaseE">tensorrt_llm::executor::Shape::Base (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor5Shape9DimType64E">tensorrt_llm::executor::Shape::DimType64 (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor5Shape5ShapeENSt16initializer_listI9DimType64EE">tensorrt_llm::executor::Shape::Shape (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor5Shape5ShapeEPK9DimType64N4Base9size_typeE">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor5Shape5ShapeEv">[2]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10SizeType32E">tensorrt_llm::executor::SizeType32 (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfigE">tensorrt_llm::executor::SpeculativeDecodingConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig10fastLogitsE">tensorrt_llm::executor::SpeculativeDecodingConfig::fastLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor25SpeculativeDecodingConfigeqERK25SpeculativeDecodingConfig">tensorrt_llm::executor::SpeculativeDecodingConfig::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig25SpeculativeDecodingConfigEb">tensorrt_llm::executor::SpeculativeDecodingConfig::SpeculativeDecodingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfoE">tensorrt_llm::executor::SpeculativeDecodingFastLogitsInfo (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo18draftParticipantIdE">tensorrt_llm::executor::SpeculativeDecodingFastLogitsInfo::draftParticipantId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo14draftRequestIdE">tensorrt_llm::executor::SpeculativeDecodingFastLogitsInfo::draftRequestId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo8toTensorEv">tensorrt_llm::executor::SpeculativeDecodingFastLogitsInfo::toTensor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19StaticBatchingStatsE">tensorrt_llm::executor::StaticBatchingStats (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19StaticBatchingStats13emptyGenSlotsE">tensorrt_llm::executor::StaticBatchingStats::emptyGenSlots (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19StaticBatchingStats18numContextRequestsE">tensorrt_llm::executor::StaticBatchingStats::numContextRequests (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19StaticBatchingStats12numCtxTokensE">tensorrt_llm::executor::StaticBatchingStats::numCtxTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19StaticBatchingStats12numGenTokensE">tensorrt_llm::executor::StaticBatchingStats::numGenTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19StaticBatchingStats20numScheduledRequestsE">tensorrt_llm::executor::StaticBatchingStats::numScheduledRequests (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor9StreamPtrE">tensorrt_llm::executor::StreamPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6TensorE">tensorrt_llm::executor::Tensor (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor6copyToENSt10shared_ptrI4ImplEE13CudaStreamPtr">tensorrt_llm::executor::Tensor::copyTo (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor9copyToCpuEN6Tensor13CudaStreamPtrE">tensorrt_llm::executor::Tensor::copyToCpu (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor9copyToGpuEN6Tensor13CudaStreamPtrE">tensorrt_llm::executor::Tensor::copyToGpu (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor13copyToManagedEN6Tensor13CudaStreamPtrE">tensorrt_llm::executor::Tensor::copyToManaged (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor12copyToPinnedEN6Tensor13CudaStreamPtrE">tensorrt_llm::executor::Tensor::copyToPinned (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor18copyToPooledPinnedEN6Tensor13CudaStreamPtrE">tensorrt_llm::executor::Tensor::copyToPooledPinned (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4I0EN12tensorrt_llm8executor6Tensor3cpuE6Tensor5Shape">tensorrt_llm::executor::Tensor::cpu (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor3cpuE8DataType5Shape">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor13CudaStreamPtrE">tensorrt_llm::executor::Tensor::CudaStreamPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE">tensorrt_llm::executor::Tensor::detail::ofITensor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor6detail9toITensorERK6Tensor">tensorrt_llm::executor::Tensor::detail::toITensor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor7getDataEv">tensorrt_llm::executor::Tensor::getData (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor7getDataEv">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor11getDataTypeEv">tensorrt_llm::executor::Tensor::getDataType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor13getMemoryTypeEv">tensorrt_llm::executor::Tensor::getMemoryType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4I0EN12tensorrt_llm8executor6Tensor14getRuntimeTypeE8DataTypev">tensorrt_llm::executor::Tensor::getRuntimeType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor8getShapeEv">tensorrt_llm::executor::Tensor::getShape (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor7getSizeEv">tensorrt_llm::executor::Tensor::getSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor14getSizeInBytesEv">tensorrt_llm::executor::Tensor::getSizeInBytes (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape">tensorrt_llm::executor::Tensor::gpu (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor4ImplE">tensorrt_llm::executor::Tensor::Impl (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4I0EN12tensorrt_llm8executor6Tensor7managedE6Tensor5Shape">tensorrt_llm::executor::Tensor::managed (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor7managedE8DataType5Shape">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor7mTensorE">tensorrt_llm::executor::Tensor::mTensor (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape">tensorrt_llm::executor::Tensor::of (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorR1T">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape">[2]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6TensorcvbEv">tensorrt_llm::executor::Tensor::operator bool (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6TensorneERK6Tensor">tensorrt_llm::executor::Tensor::operator!= (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6TensoraSERK6Tensor">tensorrt_llm::executor::Tensor::operator= (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6TensoraSERR6Tensor">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6TensoreqERK6Tensor">tensorrt_llm::executor::Tensor::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4I0EN12tensorrt_llm8executor6Tensor6pinnedE6Tensor5Shape">tensorrt_llm::executor::Tensor::pinned (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor6pinnedE8DataType5Shape">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4I0EN12tensorrt_llm8executor6Tensor12pooledPinnedE6Tensor5Shape">tensorrt_llm::executor::Tensor::pooledPinned (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor12pooledPinnedE8DataType5Shape">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor7setFromERK6Tensor13CudaStreamPtr">tensorrt_llm::executor::Tensor::setFrom (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor7setZeroE13CudaStreamPtr">tensorrt_llm::executor::Tensor::setZero (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor6TensorENSt10shared_ptrIN7runtime7ITensorEEE">tensorrt_llm::executor::Tensor::Tensor (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor6TensorERK6Tensor">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor6TensorERR6Tensor">[2]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor6TensorEv">[3]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6TensorD0Ev">tensorrt_llm::executor::Tensor::~Tensor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor9TensorPtrE">tensorrt_llm::executor::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11TokenIdTypeE">tensorrt_llm::executor::TokenIdType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4I0_bEN12tensorrt_llm8executor10TypeTraitsE">tensorrt_llm::executor::TypeTraits (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4IEN12tensorrt_llm8executor10TypeTraitsIbEE">tensorrt_llm::executor::TypeTraits&lt;bool&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10TypeTraitsIbE5valueE">tensorrt_llm::executor::TypeTraits&lt;bool&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4IEN12tensorrt_llm8executor10TypeTraitsIfEE">tensorrt_llm::executor::TypeTraits&lt;float&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10TypeTraitsIfE5valueE">tensorrt_llm::executor::TypeTraits&lt;float&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4IEN12tensorrt_llm8executor10TypeTraitsI4halfEE">tensorrt_llm::executor::TypeTraits&lt;half&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10TypeTraitsI4halfE5valueE">tensorrt_llm::executor::TypeTraits&lt;half&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7int32_tEEE">tensorrt_llm::executor::TypeTraits&lt;std::int32_t&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7int32_tEE5valueE">tensorrt_llm::executor::TypeTraits&lt;std::int32_t&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7int64_tEEE">tensorrt_llm::executor::TypeTraits&lt;std::int64_t&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7int64_tEE5valueE">tensorrt_llm::executor::TypeTraits&lt;std::int64_t&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt6int8_tEEE">tensorrt_llm::executor::TypeTraits&lt;std::int8_t&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt6int8_tEE5valueE">tensorrt_llm::executor::TypeTraits&lt;std::int8_t&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7uint8_tEEE">tensorrt_llm::executor::TypeTraits&lt;std::uint8_t&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7uint8_tEE5valueE">tensorrt_llm::executor::TypeTraits&lt;std::uint8_t&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4I0EN12tensorrt_llm8executor10TypeTraitsIP1TEE">tensorrt_llm::executor::TypeTraits&lt;T*&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10TypeTraitsIP1TE5valueE">tensorrt_llm::executor::TypeTraits&lt;T*&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11VecLogProbsE">tensorrt_llm::executor::VecLogProbs (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor16VecTokenExtraIdsE">tensorrt_llm::executor::VecTokenExtraIds (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor9VecTokensE">tensorrt_llm::executor::VecTokens (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7versionEv">tensorrt_llm::executor::version (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm6layersE">tensorrt_llm::layers (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm3mpiE">tensorrt_llm::mpi (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm7runtimeE">tensorrt_llm::runtime (C++ type)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm7runtimeE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[4]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[5]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[6]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[7]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[8]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[9]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[10]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[11]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[12]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[13]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[14]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[15]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[16]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[17]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[18]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[19]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[20]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[21]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[22]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[23]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[24]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[25]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[26]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[27]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[28]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[29]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[30]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[31]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[32]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[33]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[34]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[35]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[36]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[37]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16AllReduceBuffersE">tensorrt_llm::runtime::AllReduceBuffers (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb">tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers18mAllReduceCommPtrsE">tensorrt_llm::runtime::AllReduceBuffers::mAllReduceCommPtrs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers17mIpcMemoryHandlesE">tensorrt_llm::runtime::AllReduceBuffers::mIpcMemoryHandles (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers9TensorPtrE">tensorrt_llm::runtime::AllReduceBuffers::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer">tensorrt_llm::runtime::bufferCast (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7IBuffer9SharedPtrE">tensorrt_llm::runtime::bufferCastOrNull (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7ITensor9SharedPtrE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7IBuffer9SharedPtrEEE">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7ITensor9SharedPtrEEE">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7IBuffer14SharedConstPtrE">[4]</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7ITensor14SharedConstPtrE">[5]</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7IBuffer14SharedConstPtrEEE">[6]</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7ITensor14SharedConstPtrEEE">[7]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14BufferDataTypeE">tensorrt_llm::runtime::BufferDataType (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb">tensorrt_llm::runtime::BufferDataType::BufferDataType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14BufferDataType11getDataTypeEv">tensorrt_llm::runtime::BufferDataType::getDataType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14BufferDataType7getSizeEv">tensorrt_llm::runtime::BufferDataType::getSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14BufferDataType9isPointerEv">tensorrt_llm::runtime::BufferDataType::isPointer (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14BufferDataType10isUnsignedEv">tensorrt_llm::runtime::BufferDataType::isUnsigned (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14BufferDataType15kTrtPointerTypeE">tensorrt_llm::runtime::BufferDataType::kTrtPointerType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14BufferDataType9mDataTypeE">tensorrt_llm::runtime::BufferDataType::mDataType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14BufferDataType8mPointerE">tensorrt_llm::runtime::BufferDataType::mPointer (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14BufferDataType9mUnsignedE">tensorrt_llm::runtime::BufferDataType::mUnsigned (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14BufferDataTypecvN8nvinfer18DataTypeEEv">tensorrt_llm::runtime::BufferDataType::operator nvinfer1::DataType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManagerE">tensorrt_llm::runtime::BufferManager (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE">tensorrt_llm::runtime::BufferManager::allocate (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtrb">tensorrt_llm::runtime::BufferManager::BufferManager (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer">tensorrt_llm::runtime::BufferManager::copy (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer">[4]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType">tensorrt_llm::runtime::BufferManager::copyFrom (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType">[4]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE">tensorrt_llm::runtime::BufferManager::cpu (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager14CudaMemPoolPtrE">tensorrt_llm::runtime::BufferManager::CudaMemPoolPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager13CudaStreamPtrE">tensorrt_llm::runtime::BufferManager::CudaStreamPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE">tensorrt_llm::runtime::BufferManager::emptyBuffer (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE">tensorrt_llm::runtime::BufferManager::emptyTensor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager9getStreamEv">tensorrt_llm::runtime::BufferManager::getStream (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE">tensorrt_llm::runtime::BufferManager::gpu (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncEN8nvinfer14DimsEN8nvinfer18DataTypeE">tensorrt_llm::runtime::BufferManager::gpuSync (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncENSt6size_tEN8nvinfer18DataTypeE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager10IBufferPtrE">tensorrt_llm::runtime::BufferManager::IBufferPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager10ITensorPtrE">tensorrt_llm::runtime::BufferManager::ITensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager10kBYTE_TYPEE">tensorrt_llm::runtime::BufferManager::kBYTE_TYPE (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager7managedEN8nvinfer14DimsEN8nvinfer18DataTypeE">tensorrt_llm::runtime::BufferManager::managed (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager7managedENSt6size_tEN8nvinfer18DataTypeE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolFreeEv">tensorrt_llm::runtime::BufferManager::memoryPoolFree (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager18memoryPoolReservedEv">tensorrt_llm::runtime::BufferManager::memoryPoolReserved (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToENSt6size_tE">tensorrt_llm::runtime::BufferManager::memoryPoolTrimTo (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolUsedEv">tensorrt_llm::runtime::BufferManager::memoryPoolUsed (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager5mPoolE">tensorrt_llm::runtime::BufferManager::mPool (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager7mStreamE">tensorrt_llm::runtime::BufferManager::mStream (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager9mTrimPoolE">tensorrt_llm::runtime::BufferManager::mTrimPool (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE">tensorrt_llm::runtime::BufferManager::pinned (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolEN8nvinfer14DimsEN8nvinfer18DataTypeE">tensorrt_llm::runtime::BufferManager::pinnedPool (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolENSt6size_tEN8nvinfer18DataTypeE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager6setMemER7IBuffer7int32_t">tensorrt_llm::runtime::BufferManager::setMem (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager7setZeroER7IBuffer">tensorrt_llm::runtime::BufferManager::setZero (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManagerD0Ev">tensorrt_llm::runtime::BufferManager::~BufferManager (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime11BufferRangeE">tensorrt_llm::runtime::BufferRange (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11BufferRange4BaseE">tensorrt_llm::runtime::BufferRange::Base (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0_NSt11enable_if_tINSt10is_const_vI1UEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeERK7IBuffer">tensorrt_llm::runtime::BufferRange::BufferRange (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0_NSt11enable_if_tIXntNSt10is_const_vI1UEEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeEP1T9size_type">[2]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE">tensorrt_llm::runtime::constPointerCast (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9CudaEventE">tensorrt_llm::runtime::CudaEvent (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb">tensorrt_llm::runtime::CudaEvent::CudaEvent (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventEj">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9CudaEvent7DeleterE">tensorrt_llm::runtime::CudaEvent::Deleter (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEb">tensorrt_llm::runtime::CudaEvent::Deleter::Deleter (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter10mOwnsEventE">tensorrt_llm::runtime::CudaEvent::Deleter::mOwnsEvent (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9CudaEvent7DeleterclE7pointer">tensorrt_llm::runtime::CudaEvent::Deleter::operator() (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9CudaEvent12element_typeE">tensorrt_llm::runtime::CudaEvent::element_type (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9CudaEvent8EventPtrE">tensorrt_llm::runtime::CudaEvent::EventPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9CudaEvent3getEv">tensorrt_llm::runtime::CudaEvent::get (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9CudaEvent6mEventE">tensorrt_llm::runtime::CudaEvent::mEvent (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9CudaEvent7pointerE">tensorrt_llm::runtime::CudaEvent::pointer (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9CudaEvent11synchronizeEv">tensorrt_llm::runtime::CudaEvent::synchronize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10CudaStreamE">tensorrt_llm::runtime::CudaStream (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_t">tensorrt_llm::runtime::CudaStream::CudaStream (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji">[2]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10CudaStream7DeleterE">tensorrt_llm::runtime::CudaStream::Deleter (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEb">tensorrt_llm::runtime::CudaStream::Deleter::Deleter (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter11mOwnsStreamE">tensorrt_llm::runtime::CudaStream::Deleter::mOwnsStream (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10CudaStream7DeleterclE12cudaStream_t">tensorrt_llm::runtime::CudaStream::Deleter::operator() (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10CudaStream3getEv">tensorrt_llm::runtime::CudaStream::get (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10CudaStream9getDeviceEv">tensorrt_llm::runtime::CudaStream::getDevice (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10CudaStream7mDeviceE">tensorrt_llm::runtime::CudaStream::mDevice (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10CudaStream7mStreamE">tensorrt_llm::runtime::CudaStream::mStream (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordEN9CudaEvent7pointerE">tensorrt_llm::runtime::CudaStream::record (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordERK9CudaEvent">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10CudaStream9StreamPtrE">tensorrt_llm::runtime::CudaStream::StreamPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10CudaStream11synchronizeEv">tensorrt_llm::runtime::CudaStream::synchronize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitEN9CudaEvent7pointerE">tensorrt_llm::runtime::CudaStream::wait (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitERK9CudaEvent">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE">tensorrt_llm::runtime::DataTypeTraits (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE">tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4nameE">tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::name (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4sizeE">tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::size (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4typeE">tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::type (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedEE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4nameE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::name (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4sizeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::size (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4typeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::type (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEEE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4nameE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;::name (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4sizeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;::size (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4typeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;::type (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEEE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4nameE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;::name (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4sizeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;::size (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4typeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;::type (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEEE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4nameE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;::name (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4sizeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;::size (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4typeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;::type (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EEE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4nameE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::name (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4sizeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::size (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4typeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::type (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEEE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4nameE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;::name (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4sizeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;::size (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4typeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;::type (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EEE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4nameE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;::name (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4sizeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;::size (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4typeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;::type (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EEE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4nameE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;::name (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4sizeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;::size (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4typeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;::type (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedEE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4nameE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::name (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4sizeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::size (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4typeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::type (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoderE">tensorrt_llm::runtime::decoder (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder5InputE">tensorrt_llm::runtime::decoder::Input (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder5Input16cacheIndirectionE">tensorrt_llm::runtime::decoder::Input::cacheIndirection (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder5Input5InputE9TensorPtr">tensorrt_llm::runtime::decoder::Input::Input (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder5Input6logitsE">tensorrt_llm::runtime::decoder::Input::logits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder5Input9TensorPtrE">tensorrt_llm::runtime::decoder::Input::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder6OutputE">tensorrt_llm::runtime::decoder::Output (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder6Output16cacheIndirectionE">tensorrt_llm::runtime::decoder::Output::cacheIndirection (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder6Output6OutputEv">tensorrt_llm::runtime::decoder::Output::Output (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder6Output15sequenceLengthsE">tensorrt_llm::runtime::decoder::Output::sequenceLengths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder6Output9TensorPtrE">tensorrt_llm::runtime::decoder::Output::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batchE">tensorrt_llm::runtime::decoder_batch (C++ type)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batchE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch20DecoderFinishedEventE">tensorrt_llm::runtime::decoder_batch::DecoderFinishedEvent (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch20DecoderFinishedEvent6activeE">tensorrt_llm::runtime::decoder_batch::DecoderFinishedEvent::active (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch20DecoderFinishedEvent20DecoderFinishedEventERR9CudaEventRKNSt6vectorIbEE">tensorrt_llm::runtime::decoder_batch::DecoderFinishedEvent::DecoderFinishedEvent (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch20DecoderFinishedEvent5eventE">tensorrt_llm::runtime::decoder_batch::DecoderFinishedEvent::event (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5InputE">tensorrt_llm::runtime::decoder_batch::Input (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input6activeE">tensorrt_llm::runtime::decoder_batch::Input::active (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input16cacheIndirectionE">tensorrt_llm::runtime::decoder_batch::Input::cacheIndirection (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input11eagleInputsE">tensorrt_llm::runtime::decoder_batch::Input::eagleInputs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input15eagleLastInputsE">tensorrt_llm::runtime::decoder_batch::Input::eagleLastInputs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input25explicitDraftTokensInputsE">tensorrt_llm::runtime::decoder_batch::Input::explicitDraftTokensInputs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input29explicitDraftTokensLastInputsE">tensorrt_llm::runtime::decoder_batch::Input::explicitDraftTokensLastInputs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI9TensorPtrEE">tensorrt_llm::runtime::decoder_batch::Input::Input (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI9TensorPtrEERKNSt6vectorIbEE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input6logitsE">tensorrt_llm::runtime::decoder_batch::Input::logits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input20predictedDraftLogitsE">tensorrt_llm::runtime::decoder_batch::Input::predictedDraftLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input8seqSlotsE">tensorrt_llm::runtime::decoder_batch::Input::seqSlots (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input14TensorConstPtrE">tensorrt_llm::runtime::decoder_batch::Input::TensorConstPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input9TensorPtrE">tensorrt_llm::runtime::decoder_batch::Input::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch6OutputE">tensorrt_llm::runtime::decoder_batch::Output (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7RequestE">tensorrt_llm::runtime::decoder_batch::Request (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request12badWordsListE">tensorrt_llm::runtime::decoder_batch::Request::badWordsList (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request9BufferPtrE">tensorrt_llm::runtime::decoder_batch::Request::BufferPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request11draftLogitsE">tensorrt_llm::runtime::decoder_batch::Request::draftLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request11draftTokensE">tensorrt_llm::runtime::decoder_batch::Request::draftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request5dtypeE">tensorrt_llm::runtime::decoder_batch::Request::dtype (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request11eagleConfigE">tensorrt_llm::runtime::decoder_batch::Request::eagleConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13embeddingBiasE">tensorrt_llm::runtime::decoder_batch::Request::embeddingBias (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request5endIdE">tensorrt_llm::runtime::decoder_batch::Request::endId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request28generatedTokensPerEngineStepE">tensorrt_llm::runtime::decoder_batch::Request::generatedTokensPerEngineStep (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request3idsE">tensorrt_llm::runtime::decoder_batch::Request::ids (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request8inputLenE">tensorrt_llm::runtime::decoder_batch::Request::inputLen (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request22lookaheadRuntimeConfigE">tensorrt_llm::runtime::decoder_batch::Request::lookaheadRuntimeConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request12maxNewTokensE">tensorrt_llm::runtime::decoder_batch::Request::maxNewTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request11medusaPathsE">tensorrt_llm::runtime::decoder_batch::Request::medusaPaths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13medusaTreeIdsE">tensorrt_llm::runtime::decoder_batch::Request::medusaTreeIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE14TensorConstPtr10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EE">tensorrt_llm::runtime::decoder_batch::Request::Request (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13stopWordsListE">tensorrt_llm::runtime::decoder_batch::Request::stopWordsList (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request14TensorConstPtrE">tensorrt_llm::runtime::decoder_batch::Request::TensorConstPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request9TensorPtrE">tensorrt_llm::runtime::decoder_batch::Request::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInputE">tensorrt_llm::runtime::DecodingInput (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsLensE">tensorrt_llm::runtime::DecodingInput::badWordsLens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput13badWordsListsE">tensorrt_llm::runtime::DecodingInput::badWordsLists (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsPtrsE">tensorrt_llm::runtime::DecodingInput::badWordsPtrs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput9batchSizeE">tensorrt_llm::runtime::DecodingInput::batchSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput10batchSlotsE">tensorrt_llm::runtime::DecodingInput::batchSlots (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput16cacheIndirectionE">tensorrt_llm::runtime::DecodingInput::cacheIndirection (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE10SizeType3210SizeType3210SizeType3210SizeType3214TensorConstPtr9TensorPtr14TensorConstPtr">tensorrt_llm::runtime::DecodingInput::DecodingInput (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput11eagleInputsE">tensorrt_llm::runtime::DecodingInput::eagleInputs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputsE">tensorrt_llm::runtime::DecodingInput::EagleInputs (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs12acceptedLensE">tensorrt_llm::runtime::DecodingInput::EagleInputs::acceptedLens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15acceptedPathIdsE">tensorrt_llm::runtime::DecodingInput::EagleInputs::acceptedPathIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14acceptedTokensE">tensorrt_llm::runtime::DecodingInput::EagleInputs::acceptedTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs24chunkedContextNextTokensE">tensorrt_llm::runtime::DecodingInput::EagleInputs::chunkedContextNextTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs11EagleInputsE14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr14TensorConstPtr">tensorrt_llm::runtime::DecodingInput::EagleInputs::EagleInputs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs13lastDraftLensE">tensorrt_llm::runtime::DecodingInput::EagleInputs::lastDraftLens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14lastDraftPathsE">tensorrt_llm::runtime::DecodingInput::EagleInputs::lastDraftPaths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15lastDraftTokensE">tensorrt_llm::runtime::DecodingInput::EagleInputs::lastDraftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs13nextDraftLensE">tensorrt_llm::runtime::DecodingInput::EagleInputs::nextDraftLens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14nextDraftPathsE">tensorrt_llm::runtime::DecodingInput::EagleInputs::nextDraftPaths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15nextDraftTokensE">tensorrt_llm::runtime::DecodingInput::EagleInputs::nextDraftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs8seqSlotsE">tensorrt_llm::runtime::DecodingInput::EagleInputs::seqSlots (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput13embeddingBiasE">tensorrt_llm::runtime::DecodingInput::embeddingBias (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput6endIdsE">tensorrt_llm::runtime::DecodingInput::endIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputsE">tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25explicitDraftTokensInputsE">tensorrt_llm::runtime::DecodingInput::explicitDraftTokensInputs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15bestPathIndicesE">tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::bestPathIndices (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15bestPathLengthsE">tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::bestPathLengths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs16lastDraftIndicesE">tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::lastDraftIndices (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15lastDraftTokensE">tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::lastDraftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs21lastGenerationLengthsE">tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::lastGenerationLengths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs19lastPositionIdsBaseE">tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::lastPositionIdsBase (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs5masksE">tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::masks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs18maxGenLengthDeviceE">tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::maxGenLengthDevice (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs16nextDraftIndicesE">tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextDraftIndices (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs14nextDraftProbsE">tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextDraftProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15nextDraftTokensE">tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextDraftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs14nextFlatTokensE">tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextFlatTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs21nextGenerationLengthsE">tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextGenerationLengths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs17packedPositionIdsE">tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::packedPositionIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs8seqSlotsE">tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::seqSlots (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputsE">tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25externalDraftTokensInputsE">tensorrt_llm::runtime::DecodingInput::externalDraftTokensInputs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs17constantThresholdE">tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::constantThreshold (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs11draftLogitsE">tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::draftLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs10draftProbsE">tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::draftProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs13draftTokenIdsE">tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::draftTokenIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs14numDraftTokensE">tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::numDraftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs4stepE">tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::step (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs11targetProbsE">tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::targetProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs14useDraftLogitsE">tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::useDraftLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs18useDraftLogitsHostE">tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::useDraftLogitsHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs28useRandomAcceptanceThresholdE">tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::useRandomAcceptanceThreshold (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput13finishReasonsE">tensorrt_llm::runtime::DecodingInput::finishReasons (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput7lengthsE">tensorrt_llm::runtime::DecodingInput::lengths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput6logitsE">tensorrt_llm::runtime::DecodingInput::logits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput15lookaheadInputsE">tensorrt_llm::runtime::DecodingInput::lookaheadInputs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput15LookaheadInputsE">tensorrt_llm::runtime::DecodingInput::LookaheadInputs (C++ struct)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput15LookaheadInputs13tokensPerStepE">tensorrt_llm::runtime::DecodingInput::LookaheadInputs::tokensPerStep (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput18maxAttentionWindowE">tensorrt_llm::runtime::DecodingInput::maxAttentionWindow (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput14maxBadWordsLenE">tensorrt_llm::runtime::DecodingInput::maxBadWordsLen (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput9maxLengthE">tensorrt_llm::runtime::DecodingInput::maxLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput15maxStopWordsLenE">tensorrt_llm::runtime::DecodingInput::maxStopWordsLen (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputsE">tensorrt_llm::runtime::DecodingInput::MedusaInputs (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput12medusaInputsE">tensorrt_llm::runtime::DecodingInput::medusaInputs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs22medusaCurTokensPerStepE">tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaCurTokensPerStep (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs12medusaLogitsE">tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs11medusaPathsE">tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaPaths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs25medusaTargetTokensPerStepE">tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaTargetTokensPerStep (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs13medusaTreeIdsE">tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaTreeIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput17noRepeatNgramSizeE">tensorrt_llm::runtime::DecodingInput::noRepeatNgramSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput19sequenceLimitLengthE">tensorrt_llm::runtime::DecodingInput::sequenceLimitLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput15sinkTokenLengthE">tensorrt_llm::runtime::DecodingInput::sinkTokenLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput4stepE">tensorrt_llm::runtime::DecodingInput::step (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsLensE">tensorrt_llm::runtime::DecodingInput::stopWordsLens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput14stopWordsListsE">tensorrt_llm::runtime::DecodingInput::stopWordsLists (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsPtrsE">tensorrt_llm::runtime::DecodingInput::stopWordsPtrs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput14TensorConstPtrE">tensorrt_llm::runtime::DecodingInput::TensorConstPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput9TensorPtrE">tensorrt_llm::runtime::DecodingInput::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutputE">tensorrt_llm::runtime::DecodingOutput (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypothesesE">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14beamHypothesesE">tensorrt_llm::runtime::DecodingOutput::beamHypotheses (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses10batchDonesE">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::batchDones (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses14cumLogProbsCBAE">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::cumLogProbsCBA (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5emptyER13BufferManager">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::empty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initER13BufferManager11TokenIdType">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::init (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11logProbsCBAE">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::logProbsCBA (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18minNormedScoresCBAE">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::minNormedScoresCBA (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses15normedScoresCBAE">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::normedScoresCBA (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11numBeamsCBAE">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::numBeamsCBA (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses12outputIdsCBAE">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::outputIdsCBA (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7releaseEv">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::release (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE10SizeType3210SizeType3210SizeType32">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18sequenceLengthsCBAE">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::sequenceLengthsCBA (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE10SizeType3210SizeType32">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::slice (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput16cacheIndirectionE">tensorrt_llm::runtime::DecodingOutput::cacheIndirection (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput11cumLogProbsE">tensorrt_llm::runtime::DecodingOutput::cumLogProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14DecodingOutputE9TensorPtr9TensorPtr">tensorrt_llm::runtime::DecodingOutput::DecodingOutput (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput12eagleBuffersE">tensorrt_llm::runtime::DecodingOutput::eagleBuffers (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput26explicitDraftTokensBuffersE">tensorrt_llm::runtime::DecodingOutput::explicitDraftTokensBuffers (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput11finishedSumE">tensorrt_llm::runtime::DecodingOutput::finishedSum (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput13finishReasonsE">tensorrt_llm::runtime::DecodingOutput::finishReasons (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput11gatheredIdsE">tensorrt_llm::runtime::DecodingOutput::gatheredIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput3idsE">tensorrt_llm::runtime::DecodingOutput::ids (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput17kNegativeInfinityE">tensorrt_llm::runtime::DecodingOutput::kNegativeInfinity (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput7lengthsE">tensorrt_llm::runtime::DecodingOutput::lengths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput8logProbsE">tensorrt_llm::runtime::DecodingOutput::logProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput13logProbsTiledE">tensorrt_llm::runtime::DecodingOutput::logProbsTiled (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput16lookaheadOutputsE">tensorrt_llm::runtime::DecodingOutput::lookaheadOutputs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput9newTokensE">tensorrt_llm::runtime::DecodingOutput::newTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14newTokensStepsE">tensorrt_llm::runtime::DecodingOutput::newTokensSteps (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput12newTokensVecE">tensorrt_llm::runtime::DecodingOutput::newTokensVec (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput9parentIdsE">tensorrt_llm::runtime::DecodingOutput::parentIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputsE">tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput26speculativeDecodingOutputsE">tensorrt_llm::runtime::DecodingOutput::speculativeDecodingOutputs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs21acceptedLengthsCumSumE">tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::acceptedLengthsCumSum (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs17acceptedTokensLenE">tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::acceptedTokensLen (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs15nextDraftTokensE">tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::nextDraftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs18nextDraftTokensLenE">tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::nextDraftTokensLen (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs12pathsOffsetsE">tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::pathsOffsets (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs18prevDraftTokensLenE">tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::prevDraftTokensLen (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput9TensorPtrE">tensorrt_llm::runtime::DecodingOutput::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffersE">tensorrt_llm::runtime::EagleBuffers (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers9BufferPtrE">tensorrt_llm::runtime::EagleBuffers::BufferPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers28chunkedContextNextTokensHostE">tensorrt_llm::runtime::EagleBuffers::chunkedContextNextTokensHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers23cumSumGenerationLengthsE">tensorrt_llm::runtime::EagleBuffers::cumSumGenerationLengths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigERKN7runtime11TllmRuntimeE">tensorrt_llm::runtime::EagleBuffers::EagleBuffers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers12engineInputsE">tensorrt_llm::runtime::EagleBuffers::engineInputs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputsE">tensorrt_llm::runtime::EagleBuffers::EngineOutputs (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers13engineOutputsE">tensorrt_llm::runtime::EagleBuffers::engineOutputs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs12acceptedLensE">tensorrt_llm::runtime::EagleBuffers::EngineOutputs::acceptedLens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs13acceptedPathsE">tensorrt_llm::runtime::EagleBuffers::EngineOutputs::acceptedPaths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs14acceptedTokensE">tensorrt_llm::runtime::EagleBuffers::EngineOutputs::acceptedTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs24chunkedContextNextTokensE">tensorrt_llm::runtime::EagleBuffers::EngineOutputs::chunkedContextNextTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs13nextDraftLensE">tensorrt_llm::runtime::EagleBuffers::EngineOutputs::nextDraftLens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs14nextDraftPathsE">tensorrt_llm::runtime::EagleBuffers::EngineOutputs::nextDraftPaths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs15nextDraftTokensE">tensorrt_llm::runtime::EagleBuffers::EngineOutputs::nextDraftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers18greedySamplingHostE">tensorrt_llm::runtime::EagleBuffers::greedySamplingHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6InputsE">tensorrt_llm::runtime::EagleBuffers::Inputs (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs24chunkedContextNextTokensE">tensorrt_llm::runtime::EagleBuffers::Inputs::chunkedContextNextTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RKN7runtime11TllmRuntimeERKN7runtime11ModelConfigERKN7runtime11WorldConfigE">tensorrt_llm::runtime::EagleBuffers::Inputs::create (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs9draftLensE">tensorrt_llm::runtime::EagleBuffers::Inputs::draftLens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs10draftPathsE">tensorrt_llm::runtime::EagleBuffers::Inputs::draftPaths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs11draftTokensE">tensorrt_llm::runtime::EagleBuffers::Inputs::draftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29eagleNetCtxContextLengthsHostE">tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetCtxContextLengthsHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs34eagleNetCtxPastKeyValueLengthsHostE">tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetCtxPastKeyValueLengthsHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27eagleNetCtxRequestTypesHostE">tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetCtxRequestTypesHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29eagleNetGenContextLengthsHostE">tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetGenContextLengthsHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs34eagleNetGenPastKeyValueLengthsHostE">tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetGenPastKeyValueLengthsHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27eagleNetGenRequestTypesHostE">tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetGenRequestTypesHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18inputGenTokensHostE">tensorrt_llm::runtime::EagleBuffers::Inputs::inputGenTokensHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs14posteriorAlphaE">tensorrt_llm::runtime::EagleBuffers::Inputs::posteriorAlpha (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18posteriorThresholdE">tensorrt_llm::runtime::EagleBuffers::Inputs::posteriorThreshold (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs16randomDataSampleE">tensorrt_llm::runtime::EagleBuffers::Inputs::randomDataSample (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs20randomDataValidationE">tensorrt_llm::runtime::EagleBuffers::Inputs::randomDataValidation (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29specDecodingGenerationLengthsE">tensorrt_llm::runtime::EagleBuffers::Inputs::specDecodingGenerationLengths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs33specDecodingGenerationLengthsHostE">tensorrt_llm::runtime::EagleBuffers::Inputs::specDecodingGenerationLengthsHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs23specDecodingPackedMasksE">tensorrt_llm::runtime::EagleBuffers::Inputs::specDecodingPackedMasks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27specDecodingPositionOffsetsE">tensorrt_llm::runtime::EagleBuffers::Inputs::specDecodingPositionOffsets (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs12temperaturesE">tensorrt_llm::runtime::EagleBuffers::Inputs::temperatures (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18useDynamicTreeHostE">tensorrt_llm::runtime::EagleBuffers::Inputs::useDynamicTreeHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime12EagleBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE">tensorrt_llm::runtime::EagleBuffers::insertInputTensors (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers7ITensorE">tensorrt_llm::runtime::EagleBuffers::ITensor (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers13LlmRequestPtrE">tensorrt_llm::runtime::EagleBuffers::LlmRequestPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers19maxGenerationLengthE">tensorrt_llm::runtime::EagleBuffers::maxGenerationLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers26mDefaultPosteriorThresholdE">tensorrt_llm::runtime::EagleBuffers::mDefaultPosteriorThreshold (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers17mDoGreedySamplingE">tensorrt_llm::runtime::EagleBuffers::mDoGreedySampling (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers18posteriorAlphaHostE">tensorrt_llm::runtime::EagleBuffers::posteriorAlphaHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers22posteriorThresholdHostE">tensorrt_llm::runtime::EagleBuffers::posteriorThresholdHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers22reduceTempStorageBytesE">tensorrt_llm::runtime::EagleBuffers::reduceTempStorageBytes (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers13RequestVectorE">tensorrt_llm::runtime::EagleBuffers::RequestVector (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE">tensorrt_llm::runtime::EagleBuffers::reshape (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers21scanReduceTempStorageE">tensorrt_llm::runtime::EagleBuffers::scanReduceTempStorage (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers20scanTempStorageBytesE">tensorrt_llm::runtime::EagleBuffers::scanTempStorageBytes (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE">tensorrt_llm::runtime::EagleBuffers::setFromInputs (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime11TllmRuntimeERKN7runtime11ModelConfigERKN7runtime11WorldConfigE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers10SizeType32E">tensorrt_llm::runtime::EagleBuffers::SizeType32 (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers9TensorMapE">tensorrt_llm::runtime::EagleBuffers::TensorMap (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12EagleBuffers9TensorPtrE">tensorrt_llm::runtime::EagleBuffers::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffersE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9BufferPtrE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::BufferPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers23cumSumGenerationLengthsE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::cumSumGenerationLengths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputsE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineInputs (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12engineInputsE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::engineInputs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputs15positionOffsetsE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineInputs::positionOffsets (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputs18requestTypesDeviceE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineInputs::requestTypesDevice (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputsE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13engineOutputsE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::engineOutputs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15bestPathIndicesE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::bestPathIndices (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15bestPathLengthsE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::bestPathLengths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs5masksE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::masks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs11maxGenTokenE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::maxGenToken (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs16nextDraftIndicesE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextDraftIndices (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs14nextDraftProbsE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextDraftProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15nextDraftTokensE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextDraftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs14nextFlatTokensE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextFlatTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs21nextGenerationLengthsE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextGenerationLengths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs19nextPositionOffsetsE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextPositionOffsets (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs17packedPositionIdsE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::packedPositionIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs13totalGenTokenE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::totalGenToken (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigERKN7runtime11TllmRuntimeE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6InputsE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime11TllmRuntimeERKN7runtime11ModelConfigERKN7runtime11WorldConfigE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::create (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs12draftIndicesE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::draftIndices (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs10draftProbsE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::draftProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11draftTokensE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::draftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs17generationLengthsE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::generationLengths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs21generationLengthsHostE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::generationLengthsHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs16maxGenLengthHostE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::maxGenLengthHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11packedMasksE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::packedMasks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11positionIdsE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::positionIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs15positionIdsBaseE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::positionIdsBase (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs16randomDataSampleE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::randomDataSample (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs20randomDataValidationE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::randomDataValidation (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs12temperaturesE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::temperatures (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::insertInputTensors (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7ITensorE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ITensor (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::reshape (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers15scanTempStorageE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::scanTempStorage (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers20scanTempStorageBytesE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::scanTempStorageBytes (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11TllmRuntimeERKN7runtime11ModelConfigERKN7runtime11WorldConfigE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers10SizeType32E">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::SizeType32 (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9TensorMapE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::TensorMap (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9TensorPtrE">tensorrt_llm::runtime::ExplicitDraftTokensBuffers::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GenerationInputE">tensorrt_llm::runtime::GenerationInput (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GenerationInput4BaseE">tensorrt_llm::runtime::GenerationInput::Base (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GenerationInput15GenerationInputEK10SizeType32K10SizeType329TensorPtr9TensorPtrb">tensorrt_llm::runtime::GenerationInput::GenerationInput (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GenerationInput9TensorPtrE">tensorrt_llm::runtime::GenerationInput::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16GenerationOutputE">tensorrt_llm::runtime::GenerationOutput (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16GenerationOutput4BaseE">tensorrt_llm::runtime::GenerationOutput::Base (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16GenerationOutput16GenerationOutputE9TensorPtr9TensorPtr">tensorrt_llm::runtime::GenerationOutput::GenerationOutput (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16GenerationOutput9TensorPtrE">tensorrt_llm::runtime::GenerationOutput::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I00EN12tensorrt_llm7runtime22GenericGenerationInputE">tensorrt_llm::runtime::GenericGenerationInput (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput12badWordsListE">tensorrt_llm::runtime::GenericGenerationInput::badWordsList (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput13embeddingBiasE">tensorrt_llm::runtime::GenericGenerationInput::embeddingBias (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput5endIdE">tensorrt_llm::runtime::GenericGenerationInput::endId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput22GenericGenerationInputEK10SizeType32K10SizeType329TensorPtr9TensorPtrb">tensorrt_llm::runtime::GenericGenerationInput::GenericGenerationInput (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput3idsE">tensorrt_llm::runtime::GenericGenerationInput::ids (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput7lengthsE">tensorrt_llm::runtime::GenericGenerationInput::lengths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput12maxNewTokensE">tensorrt_llm::runtime::GenericGenerationInput::maxNewTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput6packedE">tensorrt_llm::runtime::GenericGenerationInput::packed (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput5padIdE">tensorrt_llm::runtime::GenericGenerationInput::padId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput18promptTuningParamsE">tensorrt_llm::runtime::GenericGenerationInput::promptTuningParams (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput13stopWordsListE">tensorrt_llm::runtime::GenericGenerationInput::stopWordsList (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput9TensorPtrE">tensorrt_llm::runtime::GenericGenerationInput::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime23GenericGenerationOutputE">tensorrt_llm::runtime::GenericGenerationOutput (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput8CallbackE">tensorrt_llm::runtime::GenericGenerationOutput::Callback (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput13contextLogitsE">tensorrt_llm::runtime::GenericGenerationOutput::contextLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput11cumLogProbsE">tensorrt_llm::runtime::GenericGenerationOutput::cumLogProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput16generationLogitsE">tensorrt_llm::runtime::GenericGenerationOutput::generationLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput23GenericGenerationOutputE9TensorPtr9TensorPtr">tensorrt_llm::runtime::GenericGenerationOutput::GenericGenerationOutput (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput3idsE">tensorrt_llm::runtime::GenericGenerationOutput::ids (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput7lengthsE">tensorrt_llm::runtime::GenericGenerationOutput::lengths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput8logProbsE">tensorrt_llm::runtime::GenericGenerationOutput::logProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput16onTokenGeneratedE">tensorrt_llm::runtime::GenericGenerationOutput::onTokenGenerated (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput9TensorPtrE">tensorrt_llm::runtime::GenericGenerationOutput::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime25GenericPromptTuningParamsE">tensorrt_llm::runtime::GenericPromptTuningParams (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams14embeddingTableE">tensorrt_llm::runtime::GenericPromptTuningParams::embeddingTable (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr">tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams19promptTuningEnabledE">tensorrt_llm::runtime::GenericPromptTuningParams::promptTuningEnabled (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams10SizeType32E">tensorrt_llm::runtime::GenericPromptTuningParams::SizeType32 (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams5tasksE">tensorrt_llm::runtime::GenericPromptTuningParams::tasks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9TensorPtrE">tensorrt_llm::runtime::GenericPromptTuningParams::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9vocabSizeE">tensorrt_llm::runtime::GenericPromptTuningParams::vocabSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20getDefaultBatchSlotsEN7runtime10SizeType32E">tensorrt_llm::runtime::getDefaultBatchSlots (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime10GptDecoderE">tensorrt_llm::runtime::GptDecoder (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder13CudaStreamPtrE">tensorrt_llm::runtime::GptDecoder::CudaStreamPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput">tensorrt_llm::runtime::GptDecoder::forwardAsync (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput">tensorrt_llm::runtime::GptDecoder::forwardSync (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder17getSamplingConfigEv">tensorrt_llm::runtime::GptDecoder::getSamplingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE">tensorrt_llm::runtime::GptDecoder::GptDecoder (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder23mDecodingLayerWorkspaceE">tensorrt_llm::runtime::GptDecoder::mDecodingLayerWorkspace (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder13mDecodingModeE">tensorrt_llm::runtime::GptDecoder::mDecodingMode (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder19mDynamicDecodeLayerE">tensorrt_llm::runtime::GptDecoder::mDynamicDecodeLayer (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder8mManagerE">tensorrt_llm::runtime::GptDecoder::mManager (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder13mMaxBatchSizeE">tensorrt_llm::runtime::GptDecoder::mMaxBatchSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder15mSamplingConfigE">tensorrt_llm::runtime::GptDecoder::mSamplingConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEERKNSt8optionalIKNSt6vectorIN13decoder_batch7RequestEEEEE">tensorrt_llm::runtime::GptDecoder::setup (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder9TensorPtrE">tensorrt_llm::runtime::GptDecoder::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatchedE">tensorrt_llm::runtime::GptDecoderBatched (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched34allocateSpeculativeDecodingBuffersEN8nvinfer18DataTypeE">tensorrt_llm::runtime::GptDecoderBatched::allocateSpeculativeDecodingBuffers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13CudaStreamPtrE">tensorrt_llm::runtime::GptDecoderBatched::CudaStreamPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched16DecodingInputPtrE">tensorrt_llm::runtime::GptDecoderBatched::DecodingInputPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched17DecodingOutputPtrE">tensorrt_llm::runtime::GptDecoderBatched::DecodingOutputPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeE10SizeType32RK14SamplingConfigb">tensorrt_llm::runtime::GptDecoderBatched::finalize (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERK14SamplingConfig">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE">tensorrt_llm::runtime::GptDecoderBatched::forwardAsync (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERN7decoder6OutputERKN7decoder5InputE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14forwardDecoderE10SizeType32RN13decoder_batch6OutputERKN13decoder_batch5InputE11ForwardType">tensorrt_llm::runtime::GptDecoderBatched::forwardDecoder (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERN13decoder_batch6OutputERKN13decoder_batch5InputE11ForwardType">tensorrt_llm::runtime::GptDecoderBatched::forwardDispatch (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched11forwardSyncERKN13decoder_batch20DecoderFinishedEventE">tensorrt_llm::runtime::GptDecoderBatched::forwardSync (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched11forwardSyncERKN13decoder_batch20DecoderFinishedEventERN13decoder_batch6OutputERKN13decoder_batch5InputE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched11forwardSyncEv">[2]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched11ForwardTypeE">tensorrt_llm::runtime::GptDecoderBatched::ForwardType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched11ForwardType6kASYNCE">tensorrt_llm::runtime::GptDecoderBatched::ForwardType::kASYNC (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched11ForwardType5kSYNCE">tensorrt_llm::runtime::GptDecoderBatched::ForwardType::kSYNC (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched24getAcceptedLengthsCumSumEv">tensorrt_llm::runtime::GptDecoderBatched::getAcceptedLengthsCumSum (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched22getAcceptedPackedPathsEv">tensorrt_llm::runtime::GptDecoderBatched::getAcceptedPackedPaths (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched15getAllNewTokensEv">tensorrt_llm::runtime::GptDecoderBatched::getAllNewTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched14getCumLogProbsE10SizeType32">tensorrt_llm::runtime::GptDecoderBatched::getCumLogProbs (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched14getCumLogProbsEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched15getDecodingModeEv">tensorrt_llm::runtime::GptDecoderBatched::getDecodingMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched11getFinishedEv">tensorrt_llm::runtime::GptDecoderBatched::getFinished (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched16getFinishReasonsEv">tensorrt_llm::runtime::GptDecoderBatched::getFinishReasons (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched14getGatheredIdsE10SizeType32">tensorrt_llm::runtime::GptDecoderBatched::getGatheredIds (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched14getGatheredIdsEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched6getIdsE10SizeType32">tensorrt_llm::runtime::GptDecoderBatched::getIds (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched6getIdsEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched11getLogProbsE10SizeType32">tensorrt_llm::runtime::GptDecoderBatched::getLogProbs (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched11getLogProbsEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched13getNbFinishedEv">tensorrt_llm::runtime::GptDecoderBatched::getNbFinished (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched10getNbStepsEv">tensorrt_llm::runtime::GptDecoderBatched::getNbSteps (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched12getNewTokensE10SizeType32">tensorrt_llm::runtime::GptDecoderBatched::getNewTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched18getNextDraftTokensEv">tensorrt_llm::runtime::GptDecoderBatched::getNextDraftTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched25getNextDraftTokensLengthsEv">tensorrt_llm::runtime::GptDecoderBatched::getNextDraftTokensLengths (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched12getParentIdsEv">tensorrt_llm::runtime::GptDecoderBatched::getParentIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched25getPrevDraftTokensLengthsEv">tensorrt_llm::runtime::GptDecoderBatched::getPrevDraftTokensLengths (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched17GptDecoderBatchedENSt6size_tENSt6size_tE13CudaStreamPtrRK23SpeculativeDecodingModeN8nvinfer18DataTypeE">tensorrt_llm::runtime::GptDecoderBatched::GptDecoderBatched (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13GptDecoderPtrE">tensorrt_llm::runtime::GptDecoderBatched::GptDecoderPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched16mActualBatchSizeE">tensorrt_llm::runtime::GptDecoderBatched::mActualBatchSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched18mBatchSlotsDecoderE">tensorrt_llm::runtime::GptDecoderBatched::mBatchSlotsDecoder (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched16mBatchSlotsSetupE">tensorrt_llm::runtime::GptDecoderBatched::mBatchSlotsSetup (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched11mBeamWidthsE">tensorrt_llm::runtime::GptDecoderBatched::mBeamWidths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mBufferManagerE">tensorrt_llm::runtime::GptDecoderBatched::mBufferManager (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15mCumLogProbsTmpE">tensorrt_llm::runtime::GptDecoderBatched::mCumLogProbsTmp (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched8mDecoderE">tensorrt_llm::runtime::GptDecoderBatched::mDecoder (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched19mDecoderFinishEventE">tensorrt_llm::runtime::GptDecoderBatched::mDecoderFinishEvent (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mDecoderStreamE">tensorrt_llm::runtime::GptDecoderBatched::mDecoderStream (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13mDecodingModeE">tensorrt_llm::runtime::GptDecoderBatched::mDecodingMode (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched9mFinishedE">tensorrt_llm::runtime::GptDecoderBatched::mFinished (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mFinishedStepsE">tensorrt_llm::runtime::GptDecoderBatched::mFinishedSteps (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12mFinishedSumE">tensorrt_llm::runtime::GptDecoderBatched::mFinishedSum (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13mForwardEventE">tensorrt_llm::runtime::GptDecoderBatched::mForwardEvent (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched19mJointDecodingInputE">tensorrt_llm::runtime::GptDecoderBatched::mJointDecodingInput (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched20mJointDecodingOutputE">tensorrt_llm::runtime::GptDecoderBatched::mJointDecodingOutput (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched19mMaxAttentionWindowE">tensorrt_llm::runtime::GptDecoderBatched::mMaxAttentionWindow (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched25mMaxDecodingDecoderTokensE">tensorrt_llm::runtime::GptDecoderBatched::mMaxDecodingDecoderTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched24mMaxDecodingEngineTokensE">tensorrt_llm::runtime::GptDecoderBatched::mMaxDecodingEngineTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13mMaxNewTokensE">tensorrt_llm::runtime::GptDecoderBatched::mMaxNewTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched18mMaxSequenceLengthE">tensorrt_llm::runtime::GptDecoderBatched::mMaxSequenceLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched8mNbStepsE">tensorrt_llm::runtime::GptDecoderBatched::mNbSteps (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched24mNumDecodingEngineTokensE">tensorrt_llm::runtime::GptDecoderBatched::mNumDecodingEngineTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched7mNumSMsE">tensorrt_llm::runtime::GptDecoderBatched::mNumSMs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched21mOutputBeamHypothesesE">tensorrt_llm::runtime::GptDecoderBatched::mOutputBeamHypotheses (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mRuntimeStreamE">tensorrt_llm::runtime::GptDecoderBatched::mRuntimeStream (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched16mSinkTokenLengthE">tensorrt_llm::runtime::GptDecoderBatched::mSinkTokenLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched24mSpeculativeDecodingModeE">tensorrt_llm::runtime::GptDecoderBatched::mSpeculativeDecodingMode (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched10mVocabSizeE">tensorrt_llm::runtime::GptDecoderBatched::mVocabSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched16mVocabSizePaddedE">tensorrt_llm::runtime::GptDecoderBatched::mVocabSizePadded (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched8newBatchERK15GenerationInputRK16GenerationOutputRK14SamplingConfigRK11ModelConfig">tensorrt_llm::runtime::GptDecoderBatched::newBatch (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched10newRequestE10SizeType32RKN13decoder_batch7RequestERK14SamplingConfigRK11ModelConfig">tensorrt_llm::runtime::GptDecoderBatched::newRequest (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched29newRequestDraftTokensExternalE10SizeType32RKN13decoder_batch7RequestERK14SamplingConfig">tensorrt_llm::runtime::GptDecoderBatched::newRequestDraftTokensExternal (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15newRequestEagleE10SizeType32RKN13decoder_batch7RequestERK11ModelConfig">tensorrt_llm::runtime::GptDecoderBatched::newRequestEagle (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched29newRequestExplicitDraftTokensE10SizeType32RKN13decoder_batch7RequestE">tensorrt_llm::runtime::GptDecoderBatched::newRequestExplicitDraftTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched19newRequestLookaheadE10SizeType32RKN13decoder_batch7RequestE">tensorrt_llm::runtime::GptDecoderBatched::newRequestLookahead (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched16newRequestMedusaE10SizeType32RKN13decoder_batch7RequestE">tensorrt_llm::runtime::GptDecoderBatched::newRequestMedusa (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched11newRequestsERKNSt6vectorI10SizeType32EERKNSt6vectorIN13decoder_batch7RequestEEERKNSt6vectorI14SamplingConfigEERK11ModelConfig">tensorrt_llm::runtime::GptDecoderBatched::newRequests (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched29newRequestSpeculativeDecodingE10SizeType32RKN13decoder_batch7RequestERK14SamplingConfigRK11ModelConfig">tensorrt_llm::runtime::GptDecoderBatched::newRequestSpeculativeDecoding (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched18postProcessRequestE10SizeType32RK14SamplingConfigb">tensorrt_llm::runtime::GptDecoderBatched::postProcessRequest (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14setEagleInputsERKN13decoder_batch5InputE">tensorrt_llm::runtime::GptDecoderBatched::setEagleInputs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched28setExplicitDraftTokensInputsERKN13decoder_batch5InputE">tensorrt_llm::runtime::GptDecoderBatched::setExplicitDraftTokensInputs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfig">tensorrt_llm::runtime::GptDecoderBatched::setup (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched10setupEagleEN12EagleBuffers6InputsE">tensorrt_llm::runtime::GptDecoderBatched::setupEagle (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched24setupExplicitDraftTokensEN26ExplicitDraftTokensBuffers6InputsE">tensorrt_llm::runtime::GptDecoderBatched::setupExplicitDraftTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14setupLookaheadE24LookaheadDecodingBuffers">tensorrt_llm::runtime::GptDecoderBatched::setupLookahead (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14setupLookaheadERK11ModelConfig">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched24setupSpeculativeDecodingERK11ModelConfig">tensorrt_llm::runtime::GptDecoderBatched::setupSpeculativeDecoding (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14SharedConstPtrE">tensorrt_llm::runtime::GptDecoderBatched::SharedConstPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched9TensorPtrE">tensorrt_llm::runtime::GptDecoderBatched::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14updateFinishedERKN13decoder_batch20DecoderFinishedEventE">tensorrt_llm::runtime::GptDecoderBatched::updateFinished (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfigE">tensorrt_llm::runtime::GptJsonConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfig">tensorrt_llm::runtime::GptJsonConfig::engineFilename (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig21getContextParallelismEv">tensorrt_llm::runtime::GptJsonConfig::getContextParallelism (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getGpusPerNodeEv">tensorrt_llm::runtime::GptJsonConfig::getGpusPerNode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getModelConfigEv">tensorrt_llm::runtime::GptJsonConfig::getModelConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig21getModelConfigMutableEv">tensorrt_llm::runtime::GptJsonConfig::getModelConfigMutable (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig7getNameEv">tensorrt_llm::runtime::GptJsonConfig::getName (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig22getPipelineParallelismEv">tensorrt_llm::runtime::GptJsonConfig::getPipelineParallelism (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getPrecisionEv">tensorrt_llm::runtime::GptJsonConfig::getPrecision (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig18getRuntimeDefaultsEv">tensorrt_llm::runtime::GptJsonConfig::getRuntimeDefaults (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig20getTensorParallelismEv">tensorrt_llm::runtime::GptJsonConfig::getTensorParallelism (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig10getVersionEv">tensorrt_llm::runtime::GptJsonConfig::getVersion (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getWorldSizeEv">tensorrt_llm::runtime::GptJsonConfig::getWorldSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE">tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig19mContextParallelismE">tensorrt_llm::runtime::GptJsonConfig::mContextParallelism (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig12mGpusPerNodeE">tensorrt_llm::runtime::GptJsonConfig::mGpusPerNode (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig12mModelConfigE">tensorrt_llm::runtime::GptJsonConfig::mModelConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5mNameE">tensorrt_llm::runtime::GptJsonConfig::mName (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig20mPipelineParallelismE">tensorrt_llm::runtime::GptJsonConfig::mPipelineParallelism (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig10mPrecisionE">tensorrt_llm::runtime::GptJsonConfig::mPrecision (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig16mRuntimeDefaultsE">tensorrt_llm::runtime::GptJsonConfig::mRuntimeDefaults (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig18mTensorParallelismE">tensorrt_llm::runtime::GptJsonConfig::mTensorParallelism (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig8mVersionE">tensorrt_llm::runtime::GptJsonConfig::mVersion (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt10filesystem4pathE">tensorrt_llm::runtime::GptJsonConfig::parse (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt6stringE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERNSt7istreamE">[2]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSessionE">tensorrt_llm::runtime::GptSession (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession18BaseKVCacheManagerE">tensorrt_llm::runtime::GptSession::BaseKVCacheManager (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession6ConfigE">tensorrt_llm::runtime::GptSession::Config (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession6Config6ConfigE10SizeType3210SizeType3210SizeType32f">tensorrt_llm::runtime::GptSession::Config::Config (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession6Config17ctxMicroBatchSizeE">tensorrt_llm::runtime::GptSession::Config::ctxMicroBatchSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession6Config13cudaGraphModeE">tensorrt_llm::runtime::GptSession::Config::cudaGraphMode (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession6Config17decoderPerRequestE">tensorrt_llm::runtime::GptSession::Config::decoderPerRequest (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession6Config12decodingModeE">tensorrt_llm::runtime::GptSession::Config::decodingMode (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession6Config17genMicroBatchSizeE">tensorrt_llm::runtime::GptSession::Config::genMicroBatchSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession6Config17gpuWeightsPercentE">tensorrt_llm::runtime::GptSession::Config::gpuWeightsPercent (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession6Config13kvCacheConfigE">tensorrt_llm::runtime::GptSession::Config::kvCacheConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession6Config12maxBatchSizeE">tensorrt_llm::runtime::GptSession::Config::maxBatchSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession6Config12maxBeamWidthE">tensorrt_llm::runtime::GptSession::Config::maxBeamWidth (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession6Config17maxSequenceLengthE">tensorrt_llm::runtime::GptSession::Config::maxSequenceLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession6Config17normalizeLogProbsE">tensorrt_llm::runtime::GptSession::Config::normalizeLogProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession13createBuffersE10SizeType32">tensorrt_llm::runtime::GptSession::createBuffers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession14createContextsEv">tensorrt_llm::runtime::GptSession::createContexts (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession30createCustomAllReduceWorkspaceE10SizeType3210SizeType3210SizeType32">tensorrt_llm::runtime::GptSession::createCustomAllReduceWorkspace (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeEb10SizeType32RKN8executor12DecodingModeE">tensorrt_llm::runtime::GptSession::createDecoders (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession20createKvCacheManagerE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK13KvCacheConfig">tensorrt_llm::runtime::GptSession::createKvCacheManager (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession30createOnTokenGeneratedCallbackER16GenerationOutput">tensorrt_llm::runtime::GptSession::createOnTokenGeneratedCallback (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutorE">tensorrt_llm::runtime::GptSession::CudaGraphExecutor (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor5clearEv">tensorrt_llm::runtime::GptSession::CudaGraphExecutor::clear (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6createERK11cudaGraph_t">tensorrt_llm::runtime::GptSession::CudaGraphExecutor::create (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor17CudaGraphExecutorEv">tensorrt_llm::runtime::GptSession::CudaGraphExecutor::CudaGraphExecutor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor11hasInstanceEv">tensorrt_llm::runtime::GptSession::CudaGraphExecutor::hasInstance (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6launchERK10CudaStream">tensorrt_llm::runtime::GptSession::CudaGraphExecutor::launch (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor9mInstanceE">tensorrt_llm::runtime::GptSession::CudaGraphExecutor::mInstance (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor16prepareNextGraphERK11TllmRuntime10SizeType32">tensorrt_llm::runtime::GptSession::CudaGraphExecutor::prepareNextGraph (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6updateERK11cudaGraph_t">tensorrt_llm::runtime::GptSession::CudaGraphExecutor::update (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor14uploadToStreamERK10CudaStream">tensorrt_llm::runtime::GptSession::CudaGraphExecutor::uploadToStream (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutorD0Ev">tensorrt_llm::runtime::GptSession::CudaGraphExecutor::~CudaGraphExecutor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession16decoderStepAsyncE10SizeType3210SizeType32">tensorrt_llm::runtime::GptSession::decoderStepAsync (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession18executeContextStepERKNSt6vectorI15GenerationInputEERKNSt6vectorI10SizeType32EEPK18BaseKVCacheManager">tensorrt_llm::runtime::GptSession::executeContextStep (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession21executeGenerationStepE10SizeType32RKNSt6vectorI15GenerationInputEERNSt6vectorI16GenerationOutputEERKNSt6vectorI10SizeType32EEP18BaseKVCacheManagerRNSt6vectorIbEE">tensorrt_llm::runtime::GptSession::executeGenerationStep (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession8finalizeE10SizeType32RK14SamplingConfig">tensorrt_llm::runtime::GptSession::finalize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession8generateER16GenerationOutputRK15GenerationInputRK14SamplingConfigKNSt10shared_ptrI18GenerationProfilerEE">tensorrt_llm::runtime::GptSession::generate (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession15generateBatchedERNSt6vectorI16GenerationOutputEERKNSt6vectorI15GenerationInputEERK14SamplingConfigRK22TokenGeneratedCallbackKNSt10shared_ptrI18GenerationProfilerEE">tensorrt_llm::runtime::GptSession::generateBatched (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession18GenerationProfilerE">tensorrt_llm::runtime::GptSession::GenerationProfiler (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession18GenerationProfiler3endE">tensorrt_llm::runtime::GptSession::GenerationProfiler::end (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession18GenerationProfiler5flagsE">tensorrt_llm::runtime::GptSession::GenerationProfiler::flags (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession18GenerationProfiler18GenerationProfilerEv">tensorrt_llm::runtime::GptSession::GenerationProfiler::GenerationProfiler (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession18GenerationProfiler16getElapsedTimeMsEv">tensorrt_llm::runtime::GptSession::GenerationProfiler::getElapsedTimeMs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10GptSession18GenerationProfiler6getEndEv">tensorrt_llm::runtime::GptSession::GenerationProfiler::getEnd (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10GptSession18GenerationProfiler8getStartEv">tensorrt_llm::runtime::GptSession::GenerationProfiler::getStart (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession18GenerationProfiler5startE">tensorrt_llm::runtime::GptSession::GenerationProfiler::start (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10GptSession16getBufferManagerEv">tensorrt_llm::runtime::GptSession::getBufferManager (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10GptSession9getDeviceEv">tensorrt_llm::runtime::GptSession::getDevice (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10GptSession18getEngineInspectorEv">tensorrt_llm::runtime::GptSession::getEngineInspector (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10GptSession19getLayerProfileInfoEv">tensorrt_llm::runtime::GptSession::getLayerProfileInfo (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10GptSession9getLoggerEv">tensorrt_llm::runtime::GptSession::getLogger (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10GptSession16getLogitDataTypeEv">tensorrt_llm::runtime::GptSession::getLogitDataType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10GptSession14getModelConfigEv">tensorrt_llm::runtime::GptSession::getModelConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10GptSession20getNormalizeLogProbsEv">tensorrt_llm::runtime::GptSession::getNormalizeLogProbs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10GptSession19getRuntimeStreamPtrEv">tensorrt_llm::runtime::GptSession::getRuntimeStreamPtr (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10GptSession14getWorldConfigEv">tensorrt_llm::runtime::GptSession::getWorldConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK11ModelConfigRK11WorldConfigPKvNSt6size_tE9LoggerPtr">tensorrt_llm::runtime::GptSession::GptSession (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK11ModelConfigRK11WorldConfigRK9RawEngine9LoggerPtr">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK11ModelConfigRK11WorldConfigRKNSt6stringE9LoggerPtr">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK11ModelConfigRK11WorldConfigRKNSt6vectorI7uint8_tEE9LoggerPtr">[3]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10GptSession11initDecoderER7ITensorRK15GenerationInputRK16GenerationOutputRK14SamplingConfig10SizeType32">tensorrt_llm::runtime::GptSession::initDecoder (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession19kvCacheAddSequencesE10SizeType3210SizeType3210SizeType32">tensorrt_llm::runtime::GptSession::kvCacheAddSequences (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession13KvCacheConfigE">tensorrt_llm::runtime::GptSession::KvCacheConfig (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession9LoggerPtrE">tensorrt_llm::runtime::GptSession::LoggerPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession17mAllReduceBuffersE">tensorrt_llm::runtime::GptSession::mAllReduceBuffers (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession8mBuffersE">tensorrt_llm::runtime::GptSession::mBuffers (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession10mCommEventE">tensorrt_llm::runtime::GptSession::mCommEvent (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession11mCommStreamE">tensorrt_llm::runtime::GptSession::mCommStream (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession19mCudaGraphInstancesE">tensorrt_llm::runtime::GptSession::mCudaGraphInstances (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession14mCudaGraphModeE">tensorrt_llm::runtime::GptSession::mCudaGraphMode (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession26mDecoderMaxAttentionWindowE">tensorrt_llm::runtime::GptSession::mDecoderMaxAttentionWindow (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession29mDecoderMaxAttentionWindowVecE">tensorrt_llm::runtime::GptSession::mDecoderMaxAttentionWindowVec (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession25mDecoderMaxSequenceLengthE">tensorrt_llm::runtime::GptSession::mDecoderMaxSequenceLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession9mDecodersE">tensorrt_llm::runtime::GptSession::mDecoders (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession23mDecoderSinkTokenLengthE">tensorrt_llm::runtime::GptSession::mDecoderSinkTokenLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession7mDeviceE">tensorrt_llm::runtime::GptSession::mDevice (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfigE">tensorrt_llm::runtime::GptSession::MicroBatchConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig12ctxBatchSizeE">tensorrt_llm::runtime::GptSession::MicroBatchConfig::ctxBatchSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig12genBatchSizeE">tensorrt_llm::runtime::GptSession::MicroBatchConfig::genBatchSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10GptSession16MicroBatchConfig13getGenGraphIdE10SizeType3210SizeType32">tensorrt_llm::runtime::GptSession::MicroBatchConfig::getGenGraphId (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig16MicroBatchConfigE10SizeType3210SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EE">tensorrt_llm::runtime::GptSession::MicroBatchConfig::MicroBatchConfig (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig16MicroBatchConfigEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig13numCtxBatchesE">tensorrt_llm::runtime::GptSession::MicroBatchConfig::numCtxBatches (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10GptSession16MicroBatchConfig12numCtxPerGenEv">tensorrt_llm::runtime::GptSession::MicroBatchConfig::numCtxPerGen (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig13numGenBatchesE">tensorrt_llm::runtime::GptSession::MicroBatchConfig::numGenBatches (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession15mKvCacheManagerE">tensorrt_llm::runtime::GptSession::mKvCacheManager (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession7mLoggerE">tensorrt_llm::runtime::GptSession::mLogger (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession17mMicroBatchConfigE">tensorrt_llm::runtime::GptSession::mMicroBatchConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession12mModelConfigE">tensorrt_llm::runtime::GptSession::mModelConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession18mNormalizeLogProbsE">tensorrt_llm::runtime::GptSession::mNormalizeLogProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession13mPipelineCommE">tensorrt_llm::runtime::GptSession::mPipelineComm (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession15mReceivedEventsE">tensorrt_llm::runtime::GptSession::mReceivedEvents (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession8mRuntimeE">tensorrt_llm::runtime::GptSession::mRuntime (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession12mWorldConfigE">tensorrt_llm::runtime::GptSession::mWorldConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession16setLayerProfilerEv">tensorrt_llm::runtime::GptSession::setLayerProfiler (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession5setupERK6Config">tensorrt_llm::runtime::GptSession::setup (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession14shouldStopSyncE10SizeType3210SizeType3210SizeType32">tensorrt_llm::runtime::GptSession::shouldStopSync (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10GptSession23shouldUseKVCacheManagerEv">tensorrt_llm::runtime::GptSession::shouldUseKVCacheManager (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession9TensorPtrE">tensorrt_llm::runtime::GptSession::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession22TokenGeneratedCallbackE">tensorrt_llm::runtime::GptSession::TokenGeneratedCallback (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession13useCudaGraphsEv">tensorrt_llm::runtime::GptSession::useCudaGraphs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBufferE">tensorrt_llm::runtime::IBuffer (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE">tensorrt_llm::runtime::IBuffer::data (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer4dataEv">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataEv">[3]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer8DataTypeE">tensorrt_llm::runtime::IBuffer::DataType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7IBuffer11getCapacityEv">tensorrt_llm::runtime::IBuffer::getCapacity (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7IBuffer11getDataTypeEv">tensorrt_llm::runtime::IBuffer::getDataType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7IBuffer15getDataTypeNameEv">tensorrt_llm::runtime::IBuffer::getDataTypeName (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7IBuffer13getMemoryTypeEv">tensorrt_llm::runtime::IBuffer::getMemoryType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7IBuffer17getMemoryTypeNameEv">tensorrt_llm::runtime::IBuffer::getMemoryTypeName (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7IBuffer7getSizeEv">tensorrt_llm::runtime::IBuffer::getSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7IBuffer14getSizeInBytesEv">tensorrt_llm::runtime::IBuffer::getSizeInBytes (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferERK7IBuffer">tensorrt_llm::runtime::IBuffer::IBuffer (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer10memoryTypeEPKv">tensorrt_llm::runtime::IBuffer::memoryType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBufferaSERK7IBuffer">tensorrt_llm::runtime::IBuffer::operator= (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer7releaseEv">tensorrt_llm::runtime::IBuffer::release (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer6resizeENSt6size_tE">tensorrt_llm::runtime::IBuffer::resize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer14SharedConstPtrE">tensorrt_llm::runtime::IBuffer::SharedConstPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer9SharedPtrE">tensorrt_llm::runtime::IBuffer::SharedPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE">tensorrt_llm::runtime::IBuffer::slice (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE">[3]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7IBuffer7toBytesENSt6size_tE">tensorrt_llm::runtime::IBuffer::toBytes (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer14UniqueConstPtrE">tensorrt_llm::runtime::IBuffer::UniqueConstPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer9UniquePtrE">tensorrt_llm::runtime::IBuffer::UniquePtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE">tensorrt_llm::runtime::IBuffer::view (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtr">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE">[2]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE">tensorrt_llm::runtime::IBuffer::wrap (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE">[4]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBufferD0Ev">tensorrt_llm::runtime::IBuffer::~IBuffer (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11IGptDecoderE">tensorrt_llm::runtime::IGptDecoder (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE">tensorrt_llm::runtime::IGptDecoder::create (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput">tensorrt_llm::runtime::IGptDecoder::forwardAsync (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11IGptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput">tensorrt_llm::runtime::IGptDecoder::forwardSync (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11IGptDecoder17getSamplingConfigEv">tensorrt_llm::runtime::IGptDecoder::getSamplingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEERKNSt8optionalIKNSt6vectorIN13decoder_batch7RequestEEEEE">tensorrt_llm::runtime::IGptDecoder::setup (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11IGptDecoder14TensorConstPtrE">tensorrt_llm::runtime::IGptDecoder::TensorConstPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11IGptDecoder9TensorPtrE">tensorrt_llm::runtime::IGptDecoder::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11IGptDecoderD0Ev">tensorrt_llm::runtime::IGptDecoder::~IGptDecoder (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatchedE">tensorrt_llm::runtime::IGptDecoderBatched (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13CudaStreamPtrE">tensorrt_llm::runtime::IGptDecoderBatched::CudaStreamPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched23DecoderFinishedEventPtrE">tensorrt_llm::runtime::IGptDecoderBatched::DecoderFinishedEventPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeE10SizeType32RK14SamplingConfigb">tensorrt_llm::runtime::IGptDecoderBatched::finalize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE">tensorrt_llm::runtime::IGptDecoderBatched::forward (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE">tensorrt_llm::runtime::IGptDecoderBatched::forwardAsync (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched11forwardSyncERKN13decoder_batch20DecoderFinishedEventE">tensorrt_llm::runtime::IGptDecoderBatched::forwardSync (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched11forwardSyncERKN13decoder_batch20DecoderFinishedEventERN13decoder_batch6OutputERKN13decoder_batch5InputE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched24getAcceptedLengthsCumSumEv">tensorrt_llm::runtime::IGptDecoderBatched::getAcceptedLengthsCumSum (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched22getAcceptedPackedPathsEv">tensorrt_llm::runtime::IGptDecoderBatched::getAcceptedPackedPaths (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched14getCumLogProbsE10SizeType32">tensorrt_llm::runtime::IGptDecoderBatched::getCumLogProbs (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched14getCumLogProbsEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched15getDecodingModeEv">tensorrt_llm::runtime::IGptDecoderBatched::getDecodingMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched11getFinishedEv">tensorrt_llm::runtime::IGptDecoderBatched::getFinished (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched16getFinishReasonsEv">tensorrt_llm::runtime::IGptDecoderBatched::getFinishReasons (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched14getGatheredIdsE10SizeType32">tensorrt_llm::runtime::IGptDecoderBatched::getGatheredIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched6getIdsE10SizeType32">tensorrt_llm::runtime::IGptDecoderBatched::getIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched11getLogProbsE10SizeType32">tensorrt_llm::runtime::IGptDecoderBatched::getLogProbs (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched11getLogProbsEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched10getNbStepsEv">tensorrt_llm::runtime::IGptDecoderBatched::getNbSteps (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched18getNextDraftTokensEv">tensorrt_llm::runtime::IGptDecoderBatched::getNextDraftTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched25getNextDraftTokensLengthsEv">tensorrt_llm::runtime::IGptDecoderBatched::getNextDraftTokensLengths (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched12getParentIdsEv">tensorrt_llm::runtime::IGptDecoderBatched::getParentIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched25getPrevDraftTokensLengthsEv">tensorrt_llm::runtime::IGptDecoderBatched::getPrevDraftTokensLengths (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched18IGptDecoderBatchedEv">tensorrt_llm::runtime::IGptDecoderBatched::IGptDecoderBatched (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched11newRequestsERKNSt6vectorI10SizeType32EERKNSt6vectorIN13decoder_batch7RequestEEERKNSt6vectorI14SamplingConfigEERK11ModelConfig">tensorrt_llm::runtime::IGptDecoderBatched::newRequests (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched10setupEagleEN12EagleBuffers6InputsE">tensorrt_llm::runtime::IGptDecoderBatched::setupEagle (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched24setupExplicitDraftTokensEN26ExplicitDraftTokensBuffers6InputsE">tensorrt_llm::runtime::IGptDecoderBatched::setupExplicitDraftTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched14setupLookaheadE24LookaheadDecodingBuffers">tensorrt_llm::runtime::IGptDecoderBatched::setupLookahead (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched9TensorPtrE">tensorrt_llm::runtime::IGptDecoderBatched::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemoryE">tensorrt_llm::runtime::IpcMemory (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfig">tensorrt_llm::runtime::IpcMemory::allocateIpcMemory (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemory9BufferPtrE">tensorrt_llm::runtime::IpcMemory::BufferPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemory16destroyIpcMemoryEv">tensorrt_llm::runtime::IpcMemory::destroyIpcMemory (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemory10FLAGS_SIZEE">tensorrt_llm::runtime::IpcMemory::FLAGS_SIZE (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9IpcMemory11getCommPtrsEv">tensorrt_llm::runtime::IpcMemory::getCommPtrs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb">tensorrt_llm::runtime::IpcMemory::IpcMemory (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERK9IpcMemory">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERR9IpcMemory">[2]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemory7mBufferE">tensorrt_llm::runtime::IpcMemory::mBuffer (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemory9mCommPtrsE">tensorrt_llm::runtime::IpcMemory::mCommPtrs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemory8mOpenIpcE">tensorrt_llm::runtime::IpcMemory::mOpenIpc (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemory7mTpRankE">tensorrt_llm::runtime::IpcMemory::mTpRank (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemoryaSERK9IpcMemory">tensorrt_llm::runtime::IpcMemory::operator= (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemoryaSERR9IpcMemory">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemoryD0Ev">tensorrt_llm::runtime::IpcMemory::~IpcMemory (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoderE">tensorrt_llm::runtime::IStatefulGptDecoder (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder13CudaStreamPtrE">tensorrt_llm::runtime::IStatefulGptDecoder::CudaStreamPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder8finalizeERK14SamplingConfig">tensorrt_llm::runtime::IStatefulGptDecoder::finalize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder7forwardERN7decoder6OutputERKN7decoder5InputE">tensorrt_llm::runtime::IStatefulGptDecoder::forward (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder12forwardAsyncERN7decoder6OutputERKN7decoder5InputE">tensorrt_llm::runtime::IStatefulGptDecoder::forwardAsync (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder11forwardSyncEv">tensorrt_llm::runtime::IStatefulGptDecoder::forwardSync (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder15getAllNewTokensEv">tensorrt_llm::runtime::IStatefulGptDecoder::getAllNewTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder14getCumLogProbsEv">tensorrt_llm::runtime::IStatefulGptDecoder::getCumLogProbs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder14getGatheredIdsEv">tensorrt_llm::runtime::IStatefulGptDecoder::getGatheredIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder6getIdsEv">tensorrt_llm::runtime::IStatefulGptDecoder::getIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder11getLogProbsEv">tensorrt_llm::runtime::IStatefulGptDecoder::getLogProbs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder13getNbFinishedEv">tensorrt_llm::runtime::IStatefulGptDecoder::getNbFinished (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder12getNewTokensE10SizeType32">tensorrt_llm::runtime::IStatefulGptDecoder::getNewTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder19IStatefulGptDecoderEv">tensorrt_llm::runtime::IStatefulGptDecoder::IStatefulGptDecoder (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder8newBatchERK15GenerationInputRK16GenerationOutputRK14SamplingConfigRK11ModelConfig">tensorrt_llm::runtime::IStatefulGptDecoder::newBatch (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupERKN8executor12DecodingModeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfig">tensorrt_llm::runtime::IStatefulGptDecoder::setup (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder9TensorPtrE">tensorrt_llm::runtime::IStatefulGptDecoder::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoderD0Ev">tensorrt_llm::runtime::IStatefulGptDecoder::~IStatefulGptDecoder (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensorE">tensorrt_llm::runtime::ITensor (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atE14UniqueConstPtrRR9TConstPtrRK5Shape">tensorrt_llm::runtime::ITensor::at (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atEN7ITensor14UniqueConstPtrERR9TConstPtrRKNSt16initializer_listI9DimType64EE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRK5Shape">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRKNSt16initializer_listI9DimType64EE">[3]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor8castSizeE6size_t">tensorrt_llm::runtime::ITensor::castSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor9DimType64E">tensorrt_llm::runtime::ITensor::DimType64 (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor8flattenNE9SharedPtrNSt7int64_tE">tensorrt_llm::runtime::ITensor::flattenN (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I_10SizeType32ENK12tensorrt_llm7runtime7ITensor12getDimensionE9DimType64v">tensorrt_llm::runtime::ITensor::getDimension (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7ITensor8getShapeEv">tensorrt_llm::runtime::ITensor::getShape (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorERK7ITensor">tensorrt_llm::runtime::ITensor::ITensor (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor9makeShapeERKNSt16initializer_listI9DimType64EE">tensorrt_llm::runtime::ITensor::makeShape (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensoraSERK7ITensor">tensorrt_llm::runtime::ITensor::operator= (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor7reshapeERK5Shape">tensorrt_llm::runtime::ITensor::reshape (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor6resizeENSt6size_tE">tensorrt_llm::runtime::ITensor::resize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor5ShapeE">tensorrt_llm::runtime::ITensor::Shape (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32">tensorrt_llm::runtime::ITensor::shapeEquals (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T10SizeType32">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor11shapeEqualsERK5ShapeRK5Shape">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERK5Shape">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERKNSt16initializer_listI10SizeType32EE">[4]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor14SharedConstPtrE">tensorrt_llm::runtime::ITensor::SharedConstPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor9SharedPtrE">tensorrt_llm::runtime::ITensor::SharedPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE">tensorrt_llm::runtime::ITensor::slice (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5Shape">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EE">[4]</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE">[5]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE">[6]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE">[7]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape">[8]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape9DimType64">[9]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE">[10]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE9DimType64">[11]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeE10SizeType32">tensorrt_llm::runtime::ITensor::squeeze (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape10SizeType32">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor7stridesERK5Shape">tensorrt_llm::runtime::ITensor::strides (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor9TensorMapE">tensorrt_llm::runtime::ITensor::TensorMap (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor8toStringERK5Shape">tensorrt_llm::runtime::ITensor::toString (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor14UniqueConstPtrE">tensorrt_llm::runtime::ITensor::UniqueConstPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor9UniquePtrE">tensorrt_llm::runtime::ITensor::UniquePtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeE10SizeType32">tensorrt_llm::runtime::ITensor::unsqueeze (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape10SizeType32">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape">tensorrt_llm::runtime::ITensor::view (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor4viewE9SharedPtr">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape">[2]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor6volumeERK5Shape">tensorrt_llm::runtime::ITensor::volume (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor17volumeNonNegativeERK5Shape">tensorrt_llm::runtime::ITensor::volumeNonNegative (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape">tensorrt_llm::runtime::ITensor::wrap (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE">[4]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensorD0Ev">tensorrt_llm::runtime::ITensor::~ITensor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t">tensorrt_llm::runtime::lamportInitializeAll (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffersE">tensorrt_llm::runtime::LookaheadDecodingBuffers (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers17generationLengthsE">tensorrt_llm::runtime::LookaheadDecodingBuffers::generationLengths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers7ITensorE">tensorrt_llm::runtime::LookaheadDecodingBuffers::ITensor (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers24LookaheadDecodingBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerE">tensorrt_llm::runtime::LookaheadDecodingBuffers::LookaheadDecodingBuffers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers11packedMasksE">tensorrt_llm::runtime::LookaheadDecodingBuffers::packedMasks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers11positionIdsE">tensorrt_llm::runtime::LookaheadDecodingBuffers::positionIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers15positionOffsetsE">tensorrt_llm::runtime::LookaheadDecodingBuffers::positionOffsets (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers10SizeType32E">tensorrt_llm::runtime::LookaheadDecodingBuffers::SizeType32 (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers9TensorPtrE">tensorrt_llm::runtime::LookaheadDecodingBuffers::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15LookaheadModuleE">tensorrt_llm::runtime::LookaheadModule (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime15LookaheadModule18getExecutionConfigEv">tensorrt_llm::runtime::LookaheadModule::getExecutionConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleE10SizeType3210SizeType32">tensorrt_llm::runtime::LookaheadModule::LookaheadModule (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15LookaheadModule16mExecutionConfigE">tensorrt_llm::runtime::LookaheadModule::mExecutionConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15LookaheadModule18setExecutionConfigERKN8executor23LookaheadDecodingConfigE">tensorrt_llm::runtime::LookaheadModule::setExecutionConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffersE">tensorrt_llm::runtime::LookaheadRuntimeBuffers (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers18batchSlotsHostCopyE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::batchSlotsHostCopy (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers12cumSumLengthE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::cumSumLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23generationLengthsDeviceE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::generationLengthsDevice (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers21generationLengthsHostE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::generationLengthsHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers25generationLengthsHostCopyE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::generationLengthsHostCopy (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::insertInputTensors (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers7ITensorE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::ITensor (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigERKN7runtime11TllmRuntimeE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers14packedMaskHostE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::packedMaskHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers18packedMaskHostCopyE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::packedMaskHostCopy (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers17packedMasksDeviceE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::packedMasksDevice (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers17positionIdsDeviceE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionIdsDevice (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers15positionIdsHostE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionIdsHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers19positionIdsHostCopyE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionIdsHostCopy (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers21positionOffsetsDeviceE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionOffsetsDevice (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers19positionOffsetsHostE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionOffsetsHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23positionOffsetsHostCopyE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionOffsetsHostCopy (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers7reshapeE10SizeType3210SizeType3210SizeType32">tensorrt_llm::runtime::LookaheadRuntimeBuffers::reshape (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRK24LookaheadDecodingBuffersRKN7runtime11TllmRuntimeERKN7runtime11ModelConfigERKN7runtime11WorldConfigE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers10SizeType32E">tensorrt_llm::runtime::LookaheadRuntimeBuffers::SizeType32 (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers9TensorMapE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::TensorMap (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers9TensorPtrE">tensorrt_llm::runtime::LookaheadRuntimeBuffers::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCacheE">tensorrt_llm::runtime::LoraCache (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache4bumpE10TaskIdType">tensorrt_llm::runtime::LoraCache::bump (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache18bumpTaskInProgressE10TaskIdType">tensorrt_llm::runtime::LoraCache::bumpTaskInProgress (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache19claimPagesWithEvictE10SizeType32">tensorrt_llm::runtime::LoraCache::claimPagesWithEvict (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb">tensorrt_llm::runtime::LoraCache::copyTask (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache">tensorrt_llm::runtime::LoraCache::copyTaskMapPages (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE">tensorrt_llm::runtime::LoraCache::copyToPages (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE10TaskIdType">tensorrt_llm::runtime::LoraCache::determineNumPages (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE9TensorPtr">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9LoraCache4fitsE9TensorPtr">tensorrt_llm::runtime::LoraCache::fits (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache3getE10TaskIdType">tensorrt_llm::runtime::LoraCache::get (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9LoraCache11getNumPagesEv">tensorrt_llm::runtime::LoraCache::getNumPages (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9LoraCache10getPagePtrE6size_t">tensorrt_llm::runtime::LoraCache::getPagePtr (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9LoraCache9getStatusE10TaskIdType">tensorrt_llm::runtime::LoraCache::getStatus (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9LoraCache3hasE10TaskIdType">tensorrt_llm::runtime::LoraCache::has (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9LoraCache6isDoneE10TaskIdType">tensorrt_llm::runtime::LoraCache::isDone (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9LoraCache8isLoadedE10TaskIdType">tensorrt_llm::runtime::LoraCache::isLoaded (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr">tensorrt_llm::runtime::LoraCache::loadWeights (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager">tensorrt_llm::runtime::LoraCache::LoraCache (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache11markAllDoneEv">tensorrt_llm::runtime::LoraCache::markAllDone (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache12markTaskDoneE10TaskIdType">tensorrt_llm::runtime::LoraCache::markTaskDone (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache14mBufferManagerE">tensorrt_llm::runtime::LoraCache::mBufferManager (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9mCacheMapE">tensorrt_llm::runtime::LoraCache::mCacheMap (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache11mCacheMutexE">tensorrt_llm::runtime::LoraCache::mCacheMutex (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache17mCachePageManagerE">tensorrt_llm::runtime::LoraCache::mCachePageManager (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21mDeviceBufferManagersE">tensorrt_llm::runtime::LoraCache::mDeviceBufferManagers (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache10mDoneTasksE">tensorrt_llm::runtime::LoraCache::mDoneTasks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache16mInProgressTasksE">tensorrt_llm::runtime::LoraCache::mInProgressTasks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache12mModelConfigE">tensorrt_llm::runtime::LoraCache::mModelConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache17mModuleIdToModuleE">tensorrt_llm::runtime::LoraCache::mModuleIdToModule (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache18mPageManagerConfigE">tensorrt_llm::runtime::LoraCache::mPageManagerConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache11mPagesMutexE">tensorrt_llm::runtime::LoraCache::mPagesMutex (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache12mWorldConfigE">tensorrt_llm::runtime::LoraCache::mWorldConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb">tensorrt_llm::runtime::LoraCache::put (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32">tensorrt_llm::runtime::LoraCache::splitTransposeCpu (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32">tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache10TaskIdTypeE">tensorrt_llm::runtime::LoraCache::TaskIdType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig11adapterSizeE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::adapterSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig6inSizeE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::inSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7layerIdE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::layerId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8moduleIdE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::moduleId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8numSlotsE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::numSlots (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigeqERKN9LoraCache21TaskLayerModuleConfigE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7outSizeE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::outSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig6pageIdE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::pageId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7slotIdxE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::slotIdx (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8toStringEv">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::toString (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig16weightsInPointerE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::weightsInPointer (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig17weightsOutPointerE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::weightsOutPointer (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache28TaskLayerModuleConfigListPtrE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfigListPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueE">tensorrt_llm::runtime::LoraCache::TaskValue (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue7configsE">tensorrt_llm::runtime::LoraCache::TaskValue::configs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue4doneE">tensorrt_llm::runtime::LoraCache::TaskValue::done (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue10inProgressE">tensorrt_llm::runtime::LoraCache::TaskValue::inProgress (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue2itE">tensorrt_llm::runtime::LoraCache::TaskValue::it (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue6loadedE">tensorrt_llm::runtime::LoraCache::TaskValue::loaded (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue14loadInProgressE">tensorrt_llm::runtime::LoraCache::TaskValue::loadInProgress (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueaSERR9TaskValue">tensorrt_llm::runtime::LoraCache::TaskValue::operator= (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue7pageIdsE">tensorrt_llm::runtime::LoraCache::TaskValue::pageIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb">tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERR9TaskValue">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueEv">[2]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueD0Ev">tensorrt_llm::runtime::LoraCache::TaskValue::~TaskValue (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache12TaskValuePtrE">tensorrt_llm::runtime::LoraCache::TaskValuePtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TensorPtrE">tensorrt_llm::runtime::LoraCache::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatusE">tensorrt_llm::runtime::LoraCache::ValueStatus (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus20kVALUE_STATUS_LOADEDE">tensorrt_llm::runtime::LoraCache::ValueStatus::kVALUE_STATUS_LOADED (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus21kVALUE_STATUS_MISSINGE">tensorrt_llm::runtime::LoraCache::ValueStatus::kVALUE_STATUS_MISSING (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus24kVALUE_STATUS_PROCESSINGE">tensorrt_llm::runtime::LoraCache::ValueStatus::kVALUE_STATUS_PROCESSING (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22LoraCacheFullExceptionE">tensorrt_llm::runtime::LoraCacheFullException (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22LoraCacheFullException22LoraCacheFullExceptionERKNSt6stringE">tensorrt_llm::runtime::LoraCacheFullException::LoraCacheFullException (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22LoraCacheFullExceptionD0Ev">tensorrt_llm::runtime::LoraCacheFullException::~LoraCacheFullException (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20LoraCachePageManagerE">tensorrt_llm::runtime::LoraCachePageManager (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager8blockPtrE10SizeType32">tensorrt_llm::runtime::LoraCachePageManager::blockPtr (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10claimPagesE10SizeType32">tensorrt_llm::runtime::LoraCachePageManager::claimPages (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10initializeERK13BufferManager">tensorrt_llm::runtime::LoraCachePageManager::initialize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager20LoraCachePageManagerERK26LoraCachePageManagerConfigRK13BufferManager">tensorrt_llm::runtime::LoraCachePageManager::LoraCachePageManager (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager7mConfigE">tensorrt_llm::runtime::LoraCachePageManager::mConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12mFreePageIdsE">tensorrt_llm::runtime::LoraCachePageManager::mFreePageIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager11mIsPageFreeE">tensorrt_llm::runtime::LoraCachePageManager::mIsPageFree (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager11mPageBlocksE">tensorrt_llm::runtime::LoraCachePageManager::mPageBlocks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager14mutablePagePtrENSt6size_tE">tensorrt_llm::runtime::LoraCachePageManager::mutablePagePtr (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager17numAvailablePagesEv">tensorrt_llm::runtime::LoraCachePageManager::numAvailablePages (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager7pagePtrENSt6size_tE">tensorrt_llm::runtime::LoraCachePageManager::pagePtr (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12releasePagesERKNSt6vectorINSt6size_tEEE">tensorrt_llm::runtime::LoraCachePageManager::releasePages (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager9TensorPtrE">tensorrt_llm::runtime::LoraCachePageManager::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfigE">tensorrt_llm::runtime::LoraCachePageManagerConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig11getDataTypeEv">tensorrt_llm::runtime::LoraCachePageManagerConfig::getDataType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig13getInitToZeroEv">tensorrt_llm::runtime::LoraCachePageManagerConfig::getInitToZero (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig19getMaxPagesPerBlockEv">tensorrt_llm::runtime::LoraCachePageManagerConfig::getMaxPagesPerBlock (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig13getMemoryTypeEv">tensorrt_llm::runtime::LoraCachePageManagerConfig::getMemoryType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig17getNumCopyStreamsEv">tensorrt_llm::runtime::LoraCachePageManagerConfig::getNumCopyStreams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig12getPageWidthEv">tensorrt_llm::runtime::LoraCachePageManagerConfig::getPageWidth (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig15getSlotsPerPageEv">tensorrt_llm::runtime::LoraCachePageManagerConfig::getSlotsPerPage (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig16getTotalNumPagesEv">tensorrt_llm::runtime::LoraCachePageManagerConfig::getTotalNumPages (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32">tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig9mDataTypeE">tensorrt_llm::runtime::LoraCachePageManagerConfig::mDataType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11mInitToZeroE">tensorrt_llm::runtime::LoraCachePageManagerConfig::mInitToZero (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17mMaxPagesPerBlockE">tensorrt_llm::runtime::LoraCachePageManagerConfig::mMaxPagesPerBlock (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11mMemoryTypeE">tensorrt_llm::runtime::LoraCachePageManagerConfig::mMemoryType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15mNumCopyStreamsE">tensorrt_llm::runtime::LoraCachePageManagerConfig::mNumCopyStreams (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig10mPageWidthE">tensorrt_llm::runtime::LoraCachePageManagerConfig::mPageWidth (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13mSlotsPerPageE">tensorrt_llm::runtime::LoraCachePageManagerConfig::mSlotsPerPage (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig14mTotalNumPagesE">tensorrt_llm::runtime::LoraCachePageManagerConfig::mTotalNumPages (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11setDataTypeERKN8nvinfer18DataTypeE">tensorrt_llm::runtime::LoraCachePageManagerConfig::setDataType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setInitToZeroEb">tensorrt_llm::runtime::LoraCachePageManagerConfig::setInitToZero (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig19setMaxPagesPerBlockERK10SizeType32">tensorrt_llm::runtime::LoraCachePageManagerConfig::setMaxPagesPerBlock (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setMemoryTypeERKN7runtime10MemoryTypeE">tensorrt_llm::runtime::LoraCachePageManagerConfig::setMemoryType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17setNumCopyStreamsE10SizeType32">tensorrt_llm::runtime::LoraCachePageManagerConfig::setNumCopyStreams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig12setPageWidthERK10SizeType32">tensorrt_llm::runtime::LoraCachePageManagerConfig::setPageWidth (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setSlotsPerPageERK10SizeType32">tensorrt_llm::runtime::LoraCachePageManagerConfig::setSlotsPerPage (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setTotalNumPageERK10SizeType32">tensorrt_llm::runtime::LoraCachePageManagerConfig::setTotalNumPage (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime21LoraExpectedExceptionE">tensorrt_llm::runtime::LoraExpectedException (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime21LoraExpectedException21LoraExpectedExceptionERKNSt6stringE">tensorrt_llm::runtime::LoraExpectedException::LoraExpectedException (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime21LoraExpectedExceptionD0Ev">tensorrt_llm::runtime::LoraExpectedException::~LoraExpectedException (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModuleE">tensorrt_llm::runtime::LoraModule (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32">tensorrt_llm::runtime::LoraModule::createLoraModules (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule18flattenedInOutSizeE10SizeType32">tensorrt_llm::runtime::LoraModule::flattenedInOutSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule5inDimEv">tensorrt_llm::runtime::LoraModule::inDim (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule10inDimFirstEv">tensorrt_llm::runtime::LoraModule::inDimFirst (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule6inSizeE10SizeType32">tensorrt_llm::runtime::LoraModule::inSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule12inTpSplitDimEv">tensorrt_llm::runtime::LoraModule::inTpSplitDim (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule18localInAdapterSizeE10SizeType3210SizeType32">tensorrt_llm::runtime::LoraModule::localInAdapterSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule10localInDimE10SizeType32">tensorrt_llm::runtime::LoraModule::localInDim (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule14localInOutSizeE10SizeType3210SizeType32">tensorrt_llm::runtime::LoraModule::localInOutSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule11localInSizeE10SizeType3210SizeType32">tensorrt_llm::runtime::LoraModule::localInSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule19localOutAdapterSizeE10SizeType3210SizeType32">tensorrt_llm::runtime::LoraModule::localOutAdapterSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule11localOutDimE10SizeType32">tensorrt_llm::runtime::LoraModule::localOutDim (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule12localOutSizeE10SizeType3210SizeType32">tensorrt_llm::runtime::LoraModule::localOutSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10LoraModule">tensorrt_llm::runtime::LoraModule::LoraModule (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleEv">[2]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule6mInDimE">tensorrt_llm::runtime::LoraModule::mInDim (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule11mInDimFirstE">tensorrt_llm::runtime::LoraModule::mInDimFirst (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule13mInTpSplitDimE">tensorrt_llm::runtime::LoraModule::mInTpSplitDim (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleTypeE">tensorrt_llm::runtime::LoraModule::ModuleType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kATTN_DENSEE">tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_DENSE (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_KE">tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_K (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_QE">tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_Q (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kATTN_QKVE">tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_QKV (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_VE">tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_V (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType17kCROSS_ATTN_DENSEE">tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_DENSE (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_KE">tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_K (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_QE">tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_Q (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType15kCROSS_ATTN_QKVE">tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_QKV (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_VE">tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_V (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType8kINVALIDE">tensorrt_llm::runtime::LoraModule::ModuleType::kINVALID (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_4H_TO_HE">tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_4H_TO_H (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kMLP_GATEE">tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_GATE (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_H_TO_4HE">tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_H_TO_4H (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kMLP_ROUTERE">tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_ROUTER (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMOE_4H_TO_HE">tensorrt_llm::runtime::LoraModule::ModuleType::kMOE_4H_TO_H (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kMOE_GATEE">tensorrt_llm::runtime::LoraModule::ModuleType::kMOE_GATE (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMOE_H_TO_4HE">tensorrt_llm::runtime::LoraModule::ModuleType::kMOE_H_TO_4H (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kMOE_ROUTERE">tensorrt_llm::runtime::LoraModule::ModuleType::kMOE_ROUTER (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule7mOutDimE">tensorrt_llm::runtime::LoraModule::mOutDim (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule12mOutDimFirstE">tensorrt_llm::runtime::LoraModule::mOutDimFirst (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule14mOutTpSplitDimE">tensorrt_llm::runtime::LoraModule::mOutTpSplitDim (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule5mTypeE">tensorrt_llm::runtime::LoraModule::mType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule4nameEv">tensorrt_llm::runtime::LoraModule::name (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModuleaSERK10LoraModule">tensorrt_llm::runtime::LoraModule::operator= (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule6outDimEv">tensorrt_llm::runtime::LoraModule::outDim (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule11outDimFirstEv">tensorrt_llm::runtime::LoraModule::outDimFirst (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule7outSizeE10SizeType32">tensorrt_llm::runtime::LoraModule::outSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule13outTpSplitDimEv">tensorrt_llm::runtime::LoraModule::outTpSplitDim (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule9TensorPtrE">tensorrt_llm::runtime::LoraModule::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10ModuleType">tensorrt_llm::runtime::LoraModule::toModuleName (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10SizeType32">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleTypeERKNSt11string_viewE">tensorrt_llm::runtime::LoraModule::toModuleType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule5valueEv">tensorrt_llm::runtime::LoraModule::value (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14LoraTaskIdTypeE">tensorrt_llm::runtime::LoraTaskIdType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12MedusaModuleE">tensorrt_llm::runtime::MedusaModule (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime12MedusaModule16getMedusaChoicesEv">tensorrt_llm::runtime::MedusaModule::getMedusaChoices (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12MedusaModule21mDefaultMedusaChoicesE">tensorrt_llm::runtime::MedusaModule::mDefaultMedusaChoices (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12MedusaModule13MedusaChoicesE">tensorrt_llm::runtime::MedusaModule::MedusaChoices (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleE10SizeType3210SizeType32">tensorrt_llm::runtime::MedusaModule::MedusaModule (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12MedusaModule9TensorPtrE">tensorrt_llm::runtime::MedusaModule::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCountersE">tensorrt_llm::runtime::MemoryCounters (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv10SizeType32">tensorrt_llm::runtime::MemoryCounters::allocate (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType10SizeType32">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE10SizeType32i">tensorrt_llm::runtime::MemoryCounters::bytesToString (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv10SizeType32">tensorrt_llm::runtime::MemoryCounters::deallocate (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType10SizeType32">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters8DiffTypeE">tensorrt_llm::runtime::MemoryCounters::DiffType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getCpuEv">tensorrt_llm::runtime::MemoryCounters::getCpu (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getCpuDiffEv">tensorrt_llm::runtime::MemoryCounters::getCpuDiff (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getGpuEv">tensorrt_llm::runtime::MemoryCounters::getGpu (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getGpuDiffEv">tensorrt_llm::runtime::MemoryCounters::getGpuDiff (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters11getInstanceEv">tensorrt_llm::runtime::MemoryCounters::getInstance (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14MemoryCounters9getPinnedEv">tensorrt_llm::runtime::MemoryCounters::getPinned (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedDiffEv">tensorrt_llm::runtime::MemoryCounters::getPinnedDiff (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedPoolEv">tensorrt_llm::runtime::MemoryCounters::getPinnedPool (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14MemoryCounters17getPinnedPoolDiffEv">tensorrt_llm::runtime::MemoryCounters::getPinnedPoolDiff (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getUVMEv">tensorrt_llm::runtime::MemoryCounters::getUVM (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getUVMDiffEv">tensorrt_llm::runtime::MemoryCounters::getUVMDiff (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mCpuE">tensorrt_llm::runtime::MemoryCounters::mCpu (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mCpuDiffE">tensorrt_llm::runtime::MemoryCounters::mCpuDiff (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters14MemoryCountersEv">tensorrt_llm::runtime::MemoryCounters::MemoryCounters (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mGpuE">tensorrt_llm::runtime::MemoryCounters::mGpu (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mGpuDiffE">tensorrt_llm::runtime::MemoryCounters::mGpuDiff (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters7mPinnedE">tensorrt_llm::runtime::MemoryCounters::mPinned (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedDiffE">tensorrt_llm::runtime::MemoryCounters::mPinnedDiff (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedPoolE">tensorrt_llm::runtime::MemoryCounters::mPinnedPool (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters15mPinnedPoolDiffE">tensorrt_llm::runtime::MemoryCounters::mPinnedPoolDiff (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mUVME">tensorrt_llm::runtime::MemoryCounters::mUVM (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mUVMDiffE">tensorrt_llm::runtime::MemoryCounters::mUVMDiff (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters10SizeType32E">tensorrt_llm::runtime::MemoryCounters::SizeType32 (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14MemoryCounters8toStringEv">tensorrt_llm::runtime::MemoryCounters::toString (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10MemoryTypeE">tensorrt_llm::runtime::MemoryType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10MemoryType4kCPUE">tensorrt_llm::runtime::MemoryType::kCPU (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10MemoryType4kGPUE">tensorrt_llm::runtime::MemoryType::kGPU (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10MemoryType7kPINNEDE">tensorrt_llm::runtime::MemoryType::kPINNED (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10MemoryType11kPINNEDPOOLE">tensorrt_llm::runtime::MemoryType::kPINNEDPOOL (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10MemoryType4kUVME">tensorrt_llm::runtime::MemoryType::kUVM (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime16MemoryTypeStringE">tensorrt_llm::runtime::MemoryTypeString (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEEE">tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kCPU&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEE5valueE">tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kCPU&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEEE">tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kGPU&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEE5valueE">tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kGPU&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEEE">tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNED&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEE5valueE">tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNED&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType11kPINNEDPOOLEEE">tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNEDPOOL&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType11kPINNEDPOOLEE5valueE">tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNEDPOOL&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEEE">tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kUVM&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEE5valueE">tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kUVM&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfigE">tensorrt_llm::runtime::ModelConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig20computeContextLogitsEb">tensorrt_llm::runtime::ModelConfig::computeContextLogits (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig20computeContextLogitsEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig23computeGenerationLogitsEb">tensorrt_llm::runtime::ModelConfig::computeGenerationLogits (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig23computeGenerationLogitsEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig16countLocalLayersE9LayerType10SizeType3210SizeType32">tensorrt_llm::runtime::ModelConfig::countLocalLayers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig20countLowerRankLayersE9LayerType10SizeType3210SizeType32">tensorrt_llm::runtime::ModelConfig::countLowerRankLayers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getContextFMHAEv">tensorrt_llm::runtime::ModelConfig::getContextFMHA (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getDataTypeEv">tensorrt_llm::runtime::ModelConfig::getDataType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getEncoderHiddenSizeEv">tensorrt_llm::runtime::ModelConfig::getEncoderHiddenSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getHiddenSizeEv">tensorrt_llm::runtime::ModelConfig::getHiddenSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getKVCacheTypeEv">tensorrt_llm::runtime::ModelConfig::getKVCacheType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getKvDataTypeEv">tensorrt_llm::runtime::ModelConfig::getKvDataType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getLayerTypesEv">tensorrt_llm::runtime::ModelConfig::getLayerTypes (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getLogitsDtypeEv">tensorrt_llm::runtime::ModelConfig::getLogitsDtype (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getLoraModulesEv">tensorrt_llm::runtime::ModelConfig::getLoraModules (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getManageWeightsTypeEv">tensorrt_llm::runtime::ModelConfig::getManageWeightsType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxBatchSizeEv">tensorrt_llm::runtime::ModelConfig::getMaxBatchSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxBeamWidthEv">tensorrt_llm::runtime::ModelConfig::getMaxBeamWidth (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig25getMaxDecodingDraftTokensEv">tensorrt_llm::runtime::ModelConfig::getMaxDecodingDraftTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getMaxDecodingTokensEv">tensorrt_llm::runtime::ModelConfig::getMaxDecodingTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig16getMaxEncoderLenEv">tensorrt_llm::runtime::ModelConfig::getMaxEncoderLen (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getMaxInputLenEv">tensorrt_llm::runtime::ModelConfig::getMaxInputLen (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getMaxLoraRankEv">tensorrt_llm::runtime::ModelConfig::getMaxLoraRank (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxNumTokensEv">tensorrt_llm::runtime::ModelConfig::getMaxNumTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig24getMaxPositionEmbeddingsEv">tensorrt_llm::runtime::ModelConfig::getMaxPositionEmbeddings (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig30getMaxPromptEmbeddingTableSizeEv">tensorrt_llm::runtime::ModelConfig::getMaxPromptEmbeddingTableSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig17getMaxSequenceLenEv">tensorrt_llm::runtime::ModelConfig::getMaxSequenceLen (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig16getMlpHiddenSizeEv">tensorrt_llm::runtime::ModelConfig::getMlpHiddenSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getModelNameEv">tensorrt_llm::runtime::ModelConfig::getModelName (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getModelVariantEv">tensorrt_llm::runtime::ModelConfig::getModelVariant (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getNbAttentionLayersE10SizeType3210SizeType32">tensorrt_llm::runtime::ModelConfig::getNbAttentionLayers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig10getNbHeadsEv">tensorrt_llm::runtime::ModelConfig::getNbHeads (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getNbKvHeadsE10SizeType32">tensorrt_llm::runtime::ModelConfig::getNbKvHeads (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getNbLayersE10SizeType32">tensorrt_llm::runtime::ModelConfig::getNbLayers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getNbRnnLayersE10SizeType3210SizeType32">tensorrt_llm::runtime::ModelConfig::getNbRnnLayers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getNumKvHeadsPerLayerEv">tensorrt_llm::runtime::ModelConfig::getNumKvHeadsPerLayer (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getNumKvHeadsPerLayerLocalRangeE10SizeType3210SizeType32b">tensorrt_llm::runtime::ModelConfig::getNumKvHeadsPerLayerLocalRange (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig25getOptProfilesSplitPointsEv">tensorrt_llm::runtime::ModelConfig::getOptProfilesSplitPoints (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig19getPagedContextFMHAEv">tensorrt_llm::runtime::ModelConfig::getPagedContextFMHA (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getPpReduceScatterEv">tensorrt_llm::runtime::ModelConfig::getPpReduceScatter (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getQuantModeEv">tensorrt_llm::runtime::ModelConfig::getQuantMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getRnnConfigEv">tensorrt_llm::runtime::ModelConfig::getRnnConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getRotaryEmbeddingDimEv">tensorrt_llm::runtime::ModelConfig::getRotaryEmbeddingDim (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getSizePerHeadEv">tensorrt_llm::runtime::ModelConfig::getSizePerHead (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig26getSpeculativeDecodingModeEv">tensorrt_llm::runtime::ModelConfig::getSpeculativeDecodingMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig28getSpeculativeDecodingModuleEv">tensorrt_llm::runtime::ModelConfig::getSpeculativeDecodingModule (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig31getSpeculativeDecodingModulePtrEv">tensorrt_llm::runtime::ModelConfig::getSpeculativeDecodingModulePtr (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getSpeculativeDecodingModulePtrEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getSumLocalKvHeadsE10SizeType3210SizeType32b">tensorrt_llm::runtime::ModelConfig::getSumLocalKvHeads (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig17getTokensPerBlockEv">tensorrt_llm::runtime::ModelConfig::getTokensPerBlock (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getVocabSizeEv">tensorrt_llm::runtime::ModelConfig::getVocabSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getVocabSizePaddedE10SizeType32">tensorrt_llm::runtime::ModelConfig::getVocabSizePadded (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig12hasRnnConfigEv">tensorrt_llm::runtime::ModelConfig::hasRnnConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig28hasSpeculativeDecodingModuleEv">tensorrt_llm::runtime::ModelConfig::hasSpeculativeDecodingModule (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig19isContinuousKVCacheEv">tensorrt_llm::runtime::ModelConfig::isContinuousKVCache (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig16isKVCacheEnabledEv">tensorrt_llm::runtime::ModelConfig::isKVCacheEnabled (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig14isPagedKVCacheEv">tensorrt_llm::runtime::ModelConfig::isPagedKVCache (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig10isRnnBasedEv">tensorrt_llm::runtime::ModelConfig::isRnnBased (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig18isTransformerBasedEv">tensorrt_llm::runtime::ModelConfig::isTransformerBased (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig29kDEFAULT_NUM_TOKENS_PER_BLOCKE">tensorrt_llm::runtime::ModelConfig::kDEFAULT_NUM_TOKENS_PER_BLOCK (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig26kOPT_PROFILES_SPLIT_POINTSE">tensorrt_llm::runtime::ModelConfig::kOPT_PROFILES_SPLIT_POINTS (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheTypeE">tensorrt_llm::runtime::ModelConfig::KVCacheType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType11kCONTINUOUSE">tensorrt_llm::runtime::ModelConfig::KVCacheType::kCONTINUOUS (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType9kDISABLEDE">tensorrt_llm::runtime::ModelConfig::KVCacheType::kDISABLED (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType6kPAGEDE">tensorrt_llm::runtime::ModelConfig::KVCacheType::kPAGED (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig21KVCacheTypeFromStringENSt6stringE">tensorrt_llm::runtime::ModelConfig::KVCacheTypeFromString (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerTypeE">tensorrt_llm::runtime::ModelConfig::LayerType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType10kATTENTIONE">tensorrt_llm::runtime::ModelConfig::LayerType::kATTENTION (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType7kLINEARE">tensorrt_llm::runtime::ModelConfig::LayerType::kLINEAR (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType5kNOOPE">tensorrt_llm::runtime::ModelConfig::LayerType::kNOOP (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType10kRECURRENTE">tensorrt_llm::runtime::ModelConfig::LayerType::kRECURRENT (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsTypeE">tensorrt_llm::runtime::ModelConfig::ManageWeightsType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsType9kDisabledE">tensorrt_llm::runtime::ModelConfig::ManageWeightsType::kDisabled (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsType8kEnabledE">tensorrt_llm::runtime::ModelConfig::ManageWeightsType::kEnabled (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig21mComputeContextLogitsE">tensorrt_llm::runtime::ModelConfig::mComputeContextLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig24mComputeGenerationLogitsE">tensorrt_llm::runtime::ModelConfig::mComputeGenerationLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12mContextFMHAE">tensorrt_llm::runtime::ModelConfig::mContextFMHA (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig9mDataTypeE">tensorrt_llm::runtime::ModelConfig::mDataType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig18mEncoderHiddenSizeE">tensorrt_llm::runtime::ModelConfig::mEncoderHiddenSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig11mHiddenSizeE">tensorrt_llm::runtime::ModelConfig::mHiddenSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12mInputPackedE">tensorrt_llm::runtime::ModelConfig::mInputPacked (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12mKVCacheTypeE">tensorrt_llm::runtime::ModelConfig::mKVCacheType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig11mLayerTypesE">tensorrt_llm::runtime::ModelConfig::mLayerTypes (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12mLogitsDtypeE">tensorrt_llm::runtime::ModelConfig::mLogitsDtype (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12mLoraModulesE">tensorrt_llm::runtime::ModelConfig::mLoraModules (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig18mManageWeightsTypeE">tensorrt_llm::runtime::ModelConfig::mManageWeightsType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxBatchSizeE">tensorrt_llm::runtime::ModelConfig::mMaxBatchSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxBeamWidthE">tensorrt_llm::runtime::ModelConfig::mMaxBeamWidth (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig14mMaxEncoderLenE">tensorrt_llm::runtime::ModelConfig::mMaxEncoderLen (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12mMaxInputLenE">tensorrt_llm::runtime::ModelConfig::mMaxInputLen (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12mMaxLoraRankE">tensorrt_llm::runtime::ModelConfig::mMaxLoraRank (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxNumTokensE">tensorrt_llm::runtime::ModelConfig::mMaxNumTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig22mMaxPositionEmbeddingsE">tensorrt_llm::runtime::ModelConfig::mMaxPositionEmbeddings (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig28mMaxPromptEmbeddingTableSizeE">tensorrt_llm::runtime::ModelConfig::mMaxPromptEmbeddingTableSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig15mMaxSequenceLenE">tensorrt_llm::runtime::ModelConfig::mMaxSequenceLen (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig14mMlpHiddenSizeE">tensorrt_llm::runtime::ModelConfig::mMlpHiddenSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig10mModelNameE">tensorrt_llm::runtime::ModelConfig::mModelName (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig13mModelVariantE">tensorrt_llm::runtime::ModelConfig::mModelVariant (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig18mNbAttentionLayersE">tensorrt_llm::runtime::ModelConfig::mNbAttentionLayers (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig8mNbHeadsE">tensorrt_llm::runtime::ModelConfig::mNbHeads (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig9mNbLayersE">tensorrt_llm::runtime::ModelConfig::mNbLayers (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12mNbRnnLayersE">tensorrt_llm::runtime::ModelConfig::mNbRnnLayers (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig28mNumKvHeadsPerAttentionLayerE">tensorrt_llm::runtime::ModelConfig::mNumKvHeadsPerAttentionLayer (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig33mNumKvHeadsPerCrossAttentionLayerE">tensorrt_llm::runtime::ModelConfig::mNumKvHeadsPerCrossAttentionLayer (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE">tensorrt_llm::runtime::ModelConfig::ModelConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariantE">tensorrt_llm::runtime::ModelConfig::ModelVariant (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant8kChatGlmE">tensorrt_llm::runtime::ModelConfig::ModelVariant::kChatGlm (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant7kEncDecE">tensorrt_llm::runtime::ModelConfig::ModelVariant::kEncDec (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant4kGlmE">tensorrt_llm::runtime::ModelConfig::ModelVariant::kGlm (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant4kGptE">tensorrt_llm::runtime::ModelConfig::ModelVariant::kGpt (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant6kMambaE">tensorrt_llm::runtime::ModelConfig::ModelVariant::kMamba (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant15kRecurrentGemmaE">tensorrt_llm::runtime::ModelConfig::ModelVariant::kRecurrentGemma (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig17mPagedContextFMHAE">tensorrt_llm::runtime::ModelConfig::mPagedContextFMHA (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig11mPagedStateE">tensorrt_llm::runtime::ModelConfig::mPagedState (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig16mPpReduceScatterE">tensorrt_llm::runtime::ModelConfig::mPpReduceScatter (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig10mQuantModeE">tensorrt_llm::runtime::ModelConfig::mQuantMode (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig10mRnnConfigE">tensorrt_llm::runtime::ModelConfig::mRnnConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig19mRotaryEmbeddingDimE">tensorrt_llm::runtime::ModelConfig::mRotaryEmbeddingDim (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12mSizePerHeadE">tensorrt_llm::runtime::ModelConfig::mSizePerHead (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig20mSkipCrossAttnBlocksE">tensorrt_llm::runtime::ModelConfig::mSkipCrossAttnBlocks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig24mSpeculativeDecodingModeE">tensorrt_llm::runtime::ModelConfig::mSpeculativeDecodingMode (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig26mSpeculativeDecodingModuleE">tensorrt_llm::runtime::ModelConfig::mSpeculativeDecodingModule (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig15mTokensPerBlockE">tensorrt_llm::runtime::ModelConfig::mTokensPerBlock (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig18mUseCrossAttentionE">tensorrt_llm::runtime::ModelConfig::mUseCrossAttention (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig22mUseGptAttentionPluginE">tensorrt_llm::runtime::ModelConfig::mUseGptAttentionPlugin (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig14mUseLoraPluginE">tensorrt_llm::runtime::ModelConfig::mUseLoraPlugin (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig21mUseMambaConv1dPluginE">tensorrt_llm::runtime::ModelConfig::mUseMambaConv1dPlugin (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig9mUseMropeE">tensorrt_llm::runtime::ModelConfig::mUseMrope (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig21mUsePositionEmbeddingE">tensorrt_llm::runtime::ModelConfig::mUsePositionEmbedding (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig18mUseShapeInferenceE">tensorrt_llm::runtime::ModelConfig::mUseShapeInference (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig22mUseTokenTypeEmbeddingE">tensorrt_llm::runtime::ModelConfig::mUseTokenTypeEmbedding (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig7mUseXQAE">tensorrt_llm::runtime::ModelConfig::mUseXQA (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig10mVocabSizeE">tensorrt_llm::runtime::ModelConfig::mVocabSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfigE">tensorrt_llm::runtime::ModelConfig::RnnConfig (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig10convKernelE">tensorrt_llm::runtime::ModelConfig::RnnConfig::convKernel (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig14rnnConvDimSizeE">tensorrt_llm::runtime::ModelConfig::RnnConfig::rnnConvDimSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig11rnnHeadSizeE">tensorrt_llm::runtime::ModelConfig::RnnConfig::rnnHeadSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig13rnnHiddenSizeE">tensorrt_llm::runtime::ModelConfig::RnnConfig::rnnHiddenSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig9stateSizeE">tensorrt_llm::runtime::ModelConfig::RnnConfig::stateSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig14setContextFMHAEb">tensorrt_llm::runtime::ModelConfig::setContextFMHA (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig20setEncoderHiddenSizeE10SizeType32">tensorrt_llm::runtime::ModelConfig::setEncoderHiddenSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig14setKVCacheTypeE11KVCacheType">tensorrt_llm::runtime::ModelConfig::setKVCacheType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig13setLayerTypesERKNSt6vectorI9LayerTypeEE">tensorrt_llm::runtime::ModelConfig::setLayerTypes (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLogitsDtypeEN8nvinfer18DataTypeE">tensorrt_llm::runtime::ModelConfig::setLogitsDtype (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLoraModulesERKNSt6vectorI10LoraModuleEE">tensorrt_llm::runtime::ModelConfig::setLoraModules (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig20setManageWeightsTypeEK17ManageWeightsType">tensorrt_llm::runtime::ModelConfig::setManageWeightsType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBatchSizeE10SizeType32">tensorrt_llm::runtime::ModelConfig::setMaxBatchSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBeamWidthE10SizeType32">tensorrt_llm::runtime::ModelConfig::setMaxBeamWidth (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMaxEncoderLenE10SizeType32">tensorrt_llm::runtime::ModelConfig::setMaxEncoderLen (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxInputLenE10SizeType32">tensorrt_llm::runtime::ModelConfig::setMaxInputLen (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxLoraRankE10SizeType32">tensorrt_llm::runtime::ModelConfig::setMaxLoraRank (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxNumTokensENSt8optionalI10SizeType32EE">tensorrt_llm::runtime::ModelConfig::setMaxNumTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig24setMaxPositionEmbeddingsE10SizeType32">tensorrt_llm::runtime::ModelConfig::setMaxPositionEmbeddings (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig30setMaxPromptEmbeddingTableSizeE10SizeType32">tensorrt_llm::runtime::ModelConfig::setMaxPromptEmbeddingTableSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig17setMaxSequenceLenE10SizeType32">tensorrt_llm::runtime::ModelConfig::setMaxSequenceLen (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMlpHiddenSizeE10SizeType32">tensorrt_llm::runtime::ModelConfig::setMlpHiddenSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12setModelNameERKNSt6stringE">tensorrt_llm::runtime::ModelConfig::setModelName (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig15setModelVariantE12ModelVariant">tensorrt_llm::runtime::ModelConfig::setModelVariant (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig17setNbCrossKvHeadsE10SizeType32">tensorrt_llm::runtime::ModelConfig::setNbCrossKvHeads (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12setNbKvHeadsE10SizeType32">tensorrt_llm::runtime::ModelConfig::setNbKvHeads (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig26setNumKvHeadsPerCrossLayerERKNSt6vectorI10SizeType32EE">tensorrt_llm::runtime::ModelConfig::setNumKvHeadsPerCrossLayer (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig21setNumKvHeadsPerLayerERKNSt6vectorI10SizeType32EE">tensorrt_llm::runtime::ModelConfig::setNumKvHeadsPerLayer (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig19setPagedContextFMHAEb">tensorrt_llm::runtime::ModelConfig::setPagedContextFMHA (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig18setPpReduceScatterEb">tensorrt_llm::runtime::ModelConfig::setPpReduceScatter (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12setQuantModeEN6common9QuantModeE">tensorrt_llm::runtime::ModelConfig::setQuantMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig12setRnnConfigERK9RnnConfig">tensorrt_llm::runtime::ModelConfig::setRnnConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig21setRotaryEmbeddingDimE10SizeType32">tensorrt_llm::runtime::ModelConfig::setRotaryEmbeddingDim (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig14setSizePerHeadE10SizeType32">tensorrt_llm::runtime::ModelConfig::setSizePerHead (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig22setSkipCrossAttnBlocksEb">tensorrt_llm::runtime::ModelConfig::setSkipCrossAttnBlocks (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig26setSpeculativeDecodingModeE23SpeculativeDecodingMode">tensorrt_llm::runtime::ModelConfig::setSpeculativeDecodingMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig28setSpeculativeDecodingModuleERKNSt10shared_ptrI25SpeculativeDecodingModuleEE">tensorrt_llm::runtime::ModelConfig::setSpeculativeDecodingModule (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig17setTokensPerBlockE10SizeType32">tensorrt_llm::runtime::ModelConfig::setTokensPerBlock (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseCrossAttentionEb">tensorrt_llm::runtime::ModelConfig::setUseCrossAttention (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig11setUseMropeEb">tensorrt_llm::runtime::ModelConfig::setUseMrope (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig23setUsePositionEmbeddingEb">tensorrt_llm::runtime::ModelConfig::setUsePositionEmbedding (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseShapeInferenceEb">tensorrt_llm::runtime::ModelConfig::setUseShapeInference (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig24setUseTokenTypeEmbeddingEb">tensorrt_llm::runtime::ModelConfig::setUseTokenTypeEmbedding (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig19skipCrossAttnBlocksEv">tensorrt_llm::runtime::ModelConfig::skipCrossAttnBlocks (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig24supportsInflightBatchingEv">tensorrt_llm::runtime::ModelConfig::supportsInflightBatching (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig17useCrossAttentionEv">tensorrt_llm::runtime::ModelConfig::useCrossAttention (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig21useGptAttentionPluginEb">tensorrt_llm::runtime::ModelConfig::useGptAttentionPlugin (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig21useGptAttentionPluginEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig13useLoraPluginEb">tensorrt_llm::runtime::ModelConfig::useLoraPlugin (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig13useLoraPluginEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig20useMambaConv1dPluginEb">tensorrt_llm::runtime::ModelConfig::useMambaConv1dPlugin (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig20useMambaConv1dPluginEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig8useMropeEv">tensorrt_llm::runtime::ModelConfig::useMrope (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig14usePackedInputEb">tensorrt_llm::runtime::ModelConfig::usePackedInput (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig14usePackedInputEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11ModelConfig13usePagedStateEb">tensorrt_llm::runtime::ModelConfig::usePagedState (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig13usePagedStateEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig20usePositionEmbeddingEv">tensorrt_llm::runtime::ModelConfig::usePositionEmbedding (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig15usePromptTuningEv">tensorrt_llm::runtime::ModelConfig::usePromptTuning (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig17useShapeInferenceEv">tensorrt_llm::runtime::ModelConfig::useShapeInference (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11ModelConfig21useTokenTypeEmbeddingEv">tensorrt_llm::runtime::ModelConfig::useTokenTypeEmbedding (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK10LoraModule">tensorrt_llm::runtime::operator&lt;&lt; (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK26LoraCachePageManagerConfig">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE">[4]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN9LoraCache21TaskLayerModuleConfigE">[5]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE">tensorrt_llm::runtime::PhonyNameDueToError::name (C++ member)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE">[4]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE">[5]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE">[6]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE">[7]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE">[8]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE">tensorrt_llm::runtime::PhonyNameDueToError::size (C++ member)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE">[4]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE">[5]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE">[6]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE">[7]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE">[8]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE">tensorrt_llm::runtime::PhonyNameDueToError::type (C++ type)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE">[4]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE">[5]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE">[6]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE">[7]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE">[8]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE">tensorrt_llm::runtime::PhonyNameDueToError::value (C++ member)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE">[4]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE">[5]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE">[6]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE">[7]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE">[8]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE">[9]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE">[10]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE">[11]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE">[12]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE">[13]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime18PointerElementTypeE">tensorrt_llm::runtime::PointerElementType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18PromptTuningParamsE">tensorrt_llm::runtime::PromptTuningParams (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtrK10SizeType32K10SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb">tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr">tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18PromptTuningParams10SizeType32E">tensorrt_llm::runtime::PromptTuningParams::SizeType32 (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18PromptTuningParams9TensorPtrE">tensorrt_llm::runtime::PromptTuningParams::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9RawEngineE">tensorrt_llm::runtime::RawEngine (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9RawEngineUt1_1E">tensorrt_llm::runtime::RawEngine::[anonymous] (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9RawEngine10getAddressEv">tensorrt_llm::runtime::RawEngine::getAddress (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9RawEngine13getHostMemoryEv">tensorrt_llm::runtime::RawEngine::getHostMemory (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9RawEngine23getManagedWeightsMapOptEv">tensorrt_llm::runtime::RawEngine::getManagedWeightsMapOpt (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9RawEngine7getPathEv">tensorrt_llm::runtime::RawEngine::getPath (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9RawEngine10getPathOptEv">tensorrt_llm::runtime::RawEngine::getPathOpt (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9RawEngine7getSizeEv">tensorrt_llm::runtime::RawEngine::getSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9RawEngine7getTypeEv">tensorrt_llm::runtime::RawEngine::getType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9RawEngine11mEngineAddrE">tensorrt_llm::runtime::RawEngine::mEngineAddr (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9RawEngine13mEngineBufferE">tensorrt_llm::runtime::RawEngine::mEngineBuffer (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9RawEngine11mEnginePathE">tensorrt_llm::runtime::RawEngine::mEnginePath (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9RawEngine11mEngineSizeE">tensorrt_llm::runtime::RawEngine::mEngineSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9RawEngine18mManagedWeightsMapE">tensorrt_llm::runtime::RawEngine::mManagedWeightsMap (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9RawEngine5mTypeE">tensorrt_llm::runtime::RawEngine::mType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineENSt10filesystem4pathE">tensorrt_llm::runtime::RawEngine::RawEngine (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKN8nvinfer111IHostMemoryE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKvNSt6size_tE">[2]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9RawEngine20setManagedWeightsMapENSt3mapINSt6stringEN12tensorrt_llm8executor6TensorEEE">tensorrt_llm::runtime::RawEngine::setManagedWeightsMap (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9RawEngine7setPathENSt10filesystem4pathE">tensorrt_llm::runtime::RawEngine::setPath (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9RawEngine4TypeE">tensorrt_llm::runtime::RawEngine::Type (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9RawEngine4Type15AddressWithSizeE">tensorrt_llm::runtime::RawEngine::Type::AddressWithSize (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9RawEngine4Type8FilePathE">tensorrt_llm::runtime::RawEngine::Type::FilePath (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9RawEngine4Type10HostMemoryE">tensorrt_llm::runtime::RawEngine::Type::HostMemory (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11RequestTypeE">tensorrt_llm::runtime::RequestType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11RequestType8kCONTEXTE">tensorrt_llm::runtime::RequestType::kCONTEXT (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11RequestType11kGENERATIONE">tensorrt_llm::runtime::RequestType::kGENERATION (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15RuntimeDefaultsE">tensorrt_llm::runtime::RuntimeDefaults (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults21maxAttentionWindowVecE">tensorrt_llm::runtime::RuntimeDefaults::maxAttentionWindowVec (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalI10SizeType32EE">tensorrt_llm::runtime::RuntimeDefaults::RuntimeDefaults (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15sinkTokenLengthE">tensorrt_llm::runtime::RuntimeDefaults::sinkTokenLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfigE">tensorrt_llm::runtime::SamplingConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig23beamSearchDiversityRateE">tensorrt_llm::runtime::SamplingConfig::beamSearchDiversityRate (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig9beamWidthE">tensorrt_llm::runtime::SamplingConfig::beamWidth (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig11cumLogProbsE">tensorrt_llm::runtime::SamplingConfig::cumLogProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig24draftAcceptanceThresholdE">tensorrt_llm::runtime::SamplingConfig::draftAcceptanceThreshold (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig13earlyStoppingE">tensorrt_llm::runtime::SamplingConfig::earlyStopping (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig9FloatTypeE">tensorrt_llm::runtime::SamplingConfig::FloatType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig16frequencyPenaltyE">tensorrt_llm::runtime::SamplingConfig::frequencyPenalty (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T">tensorrt_llm::runtime::SamplingConfig::fuseValues (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14SamplingConfig17getNumReturnBeamsEv">tensorrt_llm::runtime::SamplingConfig::getNumReturnBeams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig13lengthPenaltyE">tensorrt_llm::runtime::SamplingConfig::lengthPenalty (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig9minLengthE">tensorrt_llm::runtime::SamplingConfig::minLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig17noRepeatNgramSizeE">tensorrt_llm::runtime::SamplingConfig::noRepeatNgramSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig17normalizeLogProbsE">tensorrt_llm::runtime::SamplingConfig::normalizeLogProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig18numReturnSequencesE">tensorrt_llm::runtime::SamplingConfig::numReturnSequences (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14SamplingConfigeqERK14SamplingConfig">tensorrt_llm::runtime::SamplingConfig::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig6OptVecE">tensorrt_llm::runtime::SamplingConfig::OptVec (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig19originalTemperatureE">tensorrt_llm::runtime::SamplingConfig::originalTemperature (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig14outputLogProbsE">tensorrt_llm::runtime::SamplingConfig::outputLogProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig15presencePenaltyE">tensorrt_llm::runtime::SamplingConfig::presencePenalty (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig10randomSeedE">tensorrt_llm::runtime::SamplingConfig::randomSeed (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig17repetitionPenaltyE">tensorrt_llm::runtime::SamplingConfig::repetitionPenalty (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigE10SizeType32">tensorrt_llm::runtime::SamplingConfig::SamplingConfig (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKN8executor14SamplingConfigERKNSt8optionalIN8executor25ExternalDraftTokensConfigEEE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKNSt6vectorI14SamplingConfigEE">[2]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig11temperatureE">tensorrt_llm::runtime::SamplingConfig::temperature (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topKE">tensorrt_llm::runtime::SamplingConfig::topK (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig15topKMedusaHeadsE">tensorrt_llm::runtime::SamplingConfig::topKMedusaHeads (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topPE">tensorrt_llm::runtime::SamplingConfig::topP (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig9topPDecayE">tensorrt_llm::runtime::SamplingConfig::topPDecay (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig7topPMinE">tensorrt_llm::runtime::SamplingConfig::topPMin (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig12topPResetIdsE">tensorrt_llm::runtime::SamplingConfig::topPResetIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig8validateEv">tensorrt_llm::runtime::SamplingConfig::validate (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE">tensorrt_llm::runtime::SamplingConfig::validateVec (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig3VecE">tensorrt_llm::runtime::SamplingConfig::Vec (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10SizeType32E">tensorrt_llm::runtime::SizeType32 (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10SizeType64E">tensorrt_llm::runtime::SizeType64 (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingModeE">tensorrt_llm::runtime::SpeculativeDecodingMode (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9allBitSetE14UnderlyingType">tensorrt_llm::runtime::SpeculativeDecodingMode::allBitSet (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9anyBitSetE14UnderlyingType">tensorrt_llm::runtime::SpeculativeDecodingMode::anyBitSet (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode19DraftTokensExternalEv">tensorrt_llm::runtime::SpeculativeDecodingMode::DraftTokensExternal (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode5EagleEv">tensorrt_llm::runtime::SpeculativeDecodingMode::Eagle (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode19ExplicitDraftTokensEv">tensorrt_llm::runtime::SpeculativeDecodingMode::ExplicitDraftTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode14hasDraftLogitsEv">tensorrt_llm::runtime::SpeculativeDecodingMode::hasDraftLogits (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21isDraftTokensExternalEv">tensorrt_llm::runtime::SpeculativeDecodingMode::isDraftTokensExternal (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode7isEagleEv">tensorrt_llm::runtime::SpeculativeDecodingMode::isEagle (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21isExplicitDraftTokensEv">tensorrt_llm::runtime::SpeculativeDecodingMode::isExplicitDraftTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19isLookaheadDecodingEv">tensorrt_llm::runtime::SpeculativeDecodingMode::isLookaheadDecoding (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode8isMedusaEv">tensorrt_llm::runtime::SpeculativeDecodingMode::isMedusa (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode6isNoneEv">tensorrt_llm::runtime::SpeculativeDecodingMode::isNone (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode20kDraftTokensExternalE">tensorrt_llm::runtime::SpeculativeDecodingMode::kDraftTokensExternal (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6kEagleE">tensorrt_llm::runtime::SpeculativeDecodingMode::kEagle (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode20kExplicitDraftTokensE">tensorrt_llm::runtime::SpeculativeDecodingMode::kExplicitDraftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode18kLookaheadDecodingE">tensorrt_llm::runtime::SpeculativeDecodingMode::kLookaheadDecoding (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode7kMedusaE">tensorrt_llm::runtime::SpeculativeDecodingMode::kMedusa (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode5kNoneE">tensorrt_llm::runtime::SpeculativeDecodingMode::kNone (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode17LookaheadDecodingEv">tensorrt_llm::runtime::SpeculativeDecodingMode::LookaheadDecoding (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6MedusaEv">tensorrt_llm::runtime::SpeculativeDecodingMode::Medusa (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6mStateE">tensorrt_llm::runtime::SpeculativeDecodingMode::mState (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode20needsDecoderPrologueEv">tensorrt_llm::runtime::SpeculativeDecodingMode::needsDecoderPrologue (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode18needsKVCacheRewindEv">tensorrt_llm::runtime::SpeculativeDecodingMode::needsKVCacheRewind (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode4NoneEv">tensorrt_llm::runtime::SpeculativeDecodingMode::None (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingModeeqERK23SpeculativeDecodingMode">tensorrt_llm::runtime::SpeculativeDecodingMode::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19predictsDraftTokensEv">tensorrt_llm::runtime::SpeculativeDecodingMode::predictsDraftTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21requiresAttentionMaskEv">tensorrt_llm::runtime::SpeculativeDecodingMode::requiresAttentionMask (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode23SpeculativeDecodingModeE14UnderlyingType">tensorrt_llm::runtime::SpeculativeDecodingMode::SpeculativeDecodingMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode14UnderlyingTypeE">tensorrt_llm::runtime::SpeculativeDecodingMode::UnderlyingType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode18updatesPositionIdsEv">tensorrt_llm::runtime::SpeculativeDecodingMode::updatesPositionIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19variableDraftLengthEv">tensorrt_llm::runtime::SpeculativeDecodingMode::variableDraftLength (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleE">tensorrt_llm::runtime::SpeculativeDecodingModule (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule21computeNumPackedMasksEv">tensorrt_llm::runtime::SpeculativeDecodingModule::computeNumPackedMasks (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule25getMaxDecodingDraftTokensEv">tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxDecodingDraftTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule20getMaxDecodingTokensEv">tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxDecodingTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule18getMaxDraftPathLenEv">tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxDraftPathLen (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule14getMaxNumPathsEv">tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxNumPaths (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule13getMaxPathLenEv">tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxPathLen (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule17getNumPackedMasksEv">tensorrt_llm::runtime::SpeculativeDecodingModule::getNumPackedMasks (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule23mMaxDecodingDraftTokensE">tensorrt_llm::runtime::SpeculativeDecodingModule::mMaxDecodingDraftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule16mMaxDraftPathLenE">tensorrt_llm::runtime::SpeculativeDecodingModule::mMaxDraftPathLen (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule18mMaxNumPackedMasksE">tensorrt_llm::runtime::SpeculativeDecodingModule::mMaxNumPackedMasks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule12mMaxNumPathsE">tensorrt_llm::runtime::SpeculativeDecodingModule::mMaxNumPaths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleaSERK25SpeculativeDecodingModule">tensorrt_llm::runtime::SpeculativeDecodingModule::operator= (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule18setMaxDraftPathLenE10SizeType32">tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxDraftPathLen (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule17setMaxDraftTokensE10SizeType32">tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxDraftTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule14setMaxNumPathsE10SizeType32">tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxNumPaths (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleE10SizeType3210SizeType3210SizeType32">tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleERK25SpeculativeDecodingModule">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleEv">[2]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleD0Ev">tensorrt_llm::runtime::SpeculativeDecodingModule::~SpeculativeDecodingModule (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime12StringPtrMapE">tensorrt_llm::runtime::StringPtrMap (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10TllmLoggerE">tensorrt_llm::runtime::TllmLogger (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10TllmLogger8getLevelEv">tensorrt_llm::runtime::TllmLogger::getLevel (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE">tensorrt_llm::runtime::TllmLogger::log (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10TllmLogger8setLevelE8Severity">tensorrt_llm::runtime::TllmLogger::setLevel (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9to_stringERK26LoraCachePageManagerConfig">tensorrt_llm::runtime::to_string (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9to_stringERKN9LoraCache21TaskLayerModuleConfigE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16TokenExtraIdTypeE">tensorrt_llm::runtime::TokenExtraIdType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TokenIdTypeE">tensorrt_llm::runtime::TokenIdType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0_bEN12tensorrt_llm7runtime11TRTDataTypeE">tensorrt_llm::runtime::TRTDataType (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIbEE">tensorrt_llm::runtime::TRTDataType&lt;bool&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIbE5valueE">tensorrt_llm::runtime::TRTDataType&lt;bool&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIfEE">tensorrt_llm::runtime::TRTDataType&lt;float&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIfE5valueE">tensorrt_llm::runtime::TRTDataType&lt;float&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeI4halfEE">tensorrt_llm::runtime::TRTDataType&lt;half&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeI4halfE5valueE">tensorrt_llm::runtime::TRTDataType&lt;half&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7kernels13FinishedStateEEE">tensorrt_llm::runtime::TRTDataType&lt;kernels::FinishedState&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7kernels13FinishedStateEE5valueE">tensorrt_llm::runtime::TRTDataType&lt;kernels::FinishedState&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7kernels12KVCacheIndexEEE">tensorrt_llm::runtime::TRTDataType&lt;kernels::KVCacheIndex&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7kernels12KVCacheIndexEE5valueE">tensorrt_llm::runtime::TRTDataType&lt;kernels::KVCacheIndex&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7runtime11RequestTypeEEE">tensorrt_llm::runtime::TRTDataType&lt;runtime::RequestType&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7runtime11RequestTypeEE5valueE">tensorrt_llm::runtime::TRTDataType&lt;runtime::RequestType&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEEE">tensorrt_llm::runtime::TRTDataType&lt;std::int32_t&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEE5valueE">tensorrt_llm::runtime::TRTDataType&lt;std::int32_t&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEEE">tensorrt_llm::runtime::TRTDataType&lt;std::int64_t&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEE5valueE">tensorrt_llm::runtime::TRTDataType&lt;std::int64_t&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEEE">tensorrt_llm::runtime::TRTDataType&lt;std::int8_t&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEE5valueE">tensorrt_llm::runtime::TRTDataType&lt;std::int8_t&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEEE">tensorrt_llm::runtime::TRTDataType&lt;std::uint32_t&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEE5valueE">tensorrt_llm::runtime::TRTDataType&lt;std::uint32_t&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEEE">tensorrt_llm::runtime::TRTDataType&lt;std::uint64_t&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEE5valueE">tensorrt_llm::runtime::TRTDataType&lt;std::uint64_t&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEEE">tensorrt_llm::runtime::TRTDataType&lt;std::uint8_t&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEE5valueE">tensorrt_llm::runtime::TRTDataType&lt;std::uint8_t&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime11TRTDataTypeIP1TEE">tensorrt_llm::runtime::TRTDataType&lt;T*&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE15kUnderlyingTypeE">tensorrt_llm::runtime::TRTDataType&lt;T*&gt;::kUnderlyingType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE5valueE">tensorrt_llm::runtime::TRTDataType&lt;T*&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIPvEE">tensorrt_llm::runtime::TRTDataType&lt;void*&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIPvE5valueE">tensorrt_llm::runtime::TRTDataType&lt;void*&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11UniqueTokenE">tensorrt_llm::runtime::UniqueToken (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11UniqueTokeneqERK11UniqueToken">tensorrt_llm::runtime::UniqueToken::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11UniqueToken12tokenExtraIdE">tensorrt_llm::runtime::UniqueToken::tokenExtraId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11UniqueToken7tokenIdE">tensorrt_llm::runtime::UniqueToken::tokenId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime5utilsE">tensorrt_llm::runtime::utils (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime5utils10loadEngineERKNSt6stringE">tensorrt_llm::runtime::utils::loadEngine (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16VecTokenExtraIdsE">tensorrt_llm::runtime::VecTokenExtraIds (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15VecUniqueTokensE">tensorrt_llm::runtime::VecUniqueTokens (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11WorldConfigE">tensorrt_llm::runtime::WorldConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getContextParallelGroupEv">tensorrt_llm::runtime::WorldConfig::getContextParallelGroup (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getContextParallelismEv">tensorrt_llm::runtime::WorldConfig::getContextParallelism (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getContextParallelRankEv">tensorrt_llm::runtime::WorldConfig::getContextParallelRank (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig9getDeviceEv">tensorrt_llm::runtime::WorldConfig::getDevice (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getDeviceOfE10SizeType32">tensorrt_llm::runtime::WorldConfig::getDeviceOf (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig15getGpusPerGroupEv">tensorrt_llm::runtime::WorldConfig::getGpusPerGroup (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig14getGpusPerNodeEv">tensorrt_llm::runtime::WorldConfig::getGpusPerNode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getLastRankEv">tensorrt_llm::runtime::WorldConfig::getLastRank (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig12getLocalRankEv">tensorrt_llm::runtime::WorldConfig::getLocalRank (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getNodeRankEv">tensorrt_llm::runtime::WorldConfig::getNodeRank (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig13getNodeRankOfE10SizeType32">tensorrt_llm::runtime::WorldConfig::getNodeRankOf (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig24getPipelineParallelGroupEv">tensorrt_llm::runtime::WorldConfig::getPipelineParallelGroup (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getPipelineParallelismEv">tensorrt_llm::runtime::WorldConfig::getPipelineParallelism (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getPipelineParallelRankEv">tensorrt_llm::runtime::WorldConfig::getPipelineParallelRank (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getRankEv">tensorrt_llm::runtime::WorldConfig::getRank (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getSizeEv">tensorrt_llm::runtime::WorldConfig::getSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getTensorParallelGroupEv">tensorrt_llm::runtime::WorldConfig::getTensorParallelGroup (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig20getTensorParallelismEv">tensorrt_llm::runtime::WorldConfig::getTensorParallelism (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getTensorParallelRankEv">tensorrt_llm::runtime::WorldConfig::getTensorParallelRank (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig17isContextParallelEv">tensorrt_llm::runtime::WorldConfig::isContextParallel (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isFirstContextParallelRankEv">tensorrt_llm::runtime::WorldConfig::isFirstContextParallelRank (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig27isFirstPipelineParallelRankEv">tensorrt_llm::runtime::WorldConfig::isFirstPipelineParallelRank (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig25isFirstTensorParallelRankEv">tensorrt_llm::runtime::WorldConfig::isFirstTensorParallelRank (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isLastPipelineParallelRankEv">tensorrt_llm::runtime::WorldConfig::isLastPipelineParallelRank (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig18isPipelineParallelEv">tensorrt_llm::runtime::WorldConfig::isPipelineParallel (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig16isTensorParallelEv">tensorrt_llm::runtime::WorldConfig::isTensorParallel (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11WorldConfig19kDefaultGpusPerNodeE">tensorrt_llm::runtime::WorldConfig::kDefaultGpusPerNode (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11WorldConfig19mContextParallelismE">tensorrt_llm::runtime::WorldConfig::mContextParallelism (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11WorldConfig10mDeviceIdsE">tensorrt_llm::runtime::WorldConfig::mDeviceIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11WorldConfig12mGpusPerNodeE">tensorrt_llm::runtime::WorldConfig::mGpusPerNode (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEE">tensorrt_llm::runtime::WorldConfig::mpi (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11WorldConfig20mPipelineParallelismE">tensorrt_llm::runtime::WorldConfig::mPipelineParallelism (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11WorldConfig5mRankE">tensorrt_llm::runtime::WorldConfig::mRank (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11WorldConfig18mTensorParallelismE">tensorrt_llm::runtime::WorldConfig::mTensorParallelism (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig14validMpiConfigEv">tensorrt_llm::runtime::WorldConfig::validMpiConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEE">tensorrt_llm::runtime::WorldConfig::WorldConfig (C++ function)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.to_dict">to_dict() (tensorrt_llm.llmapi.BuildConfig method)</a>

      <ul>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.to_dict">(tensorrt_llm.llmapi.CalibConfig method)</a>
</li>
        <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.to_dict">(tensorrt_llm.llmapi.QuantConfig method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.ChatGLMConfig.to_dict">(tensorrt_llm.models.ChatGLMConfig method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.CogVLMConfig.to_dict">(tensorrt_llm.models.CogVLMConfig method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.DbrxConfig.to_dict">(tensorrt_llm.models.DbrxConfig method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.FalconConfig.to_dict">(tensorrt_llm.models.FalconConfig method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaConfig.to_dict">(tensorrt_llm.models.GemmaConfig method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTConfig.to_dict">(tensorrt_llm.models.GPTConfig method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTJConfig.to_dict">(tensorrt_llm.models.GPTJConfig method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.LLaMAConfig.to_dict">(tensorrt_llm.models.LLaMAConfig method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.MedusaConfig.to_dict">(tensorrt_llm.models.MedusaConfig method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.to_dict">(tensorrt_llm.models.PretrainedConfig method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.to_json_file">to_json_file() (tensorrt_llm.models.PretrainedConfig method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.to_layer_quant_config">to_layer_quant_config() (tensorrt_llm.models.PretrainedConfig method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.to_legacy_setting">to_legacy_setting() (tensorrt_llm.plugin.PluginConfig method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.tokenizer">tokenizer (tensorrt_llm.llmapi.LLM property)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.tokenizer_image_token">tokenizer_image_token() (tensorrt_llm.runtime.MultimodalModelRunner static method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CalibConfig.tokenizer_max_seq_length">tokenizer_max_seq_length (tensorrt_llm.llmapi.CalibConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.tokens_per_block">tokens_per_block (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.tokens_per_block">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.top_k">top_k (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.top_k">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.top_p">top_p (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.top_p">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.top_p_decay">top_p_decay (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.top_p_decay">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.top_p_min">top_p_min (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.top_p_min">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.top_p_reset_ids">top_p_reset_ids (tensorrt_llm.llmapi.SamplingParams attribute)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.top_p_reset_ids">(tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.topk">topk() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.Linear.tp_split_dim">tp_split_dim() (tensorrt_llm.layers.linear.Linear class method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.LinearBase.tp_split_dim">(tensorrt_llm.layers.linear.LinearBase class method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.RowLinear.tp_split_dim">(tensorrt_llm.layers.linear.RowLinear class method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.transpose">transpose() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.transpose">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li>
    trtllm-serve command line option

      <ul>
        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-host">--host</a>
</li>
        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-kv_cache_free_gpu_memory_fraction">--kv_cache_free_gpu_memory_fraction</a>
</li>
        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-max_batch_size">--max_batch_size</a>
</li>
        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-max_beam_width">--max_beam_width</a>
</li>
        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-max_num_tokens">--max_num_tokens</a>
</li>
        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-max_seq_len">--max_seq_len</a>
</li>
        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-port">--port</a>
</li>
        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-pp_size">--pp_size</a>
</li>
        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-tokenizer">--tokenizer</a>
</li>
        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-tp_size">--tp_size</a>
</li>
        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-trust_remote_code">--trust_remote_code</a>
</li>
        <li><a href="commands/trtllm-serve.html#cmdoption-trtllm-serve-arg-MODEL">MODEL</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.trtllm_modules_to_hf_modules">trtllm_modules_to_hf_modules (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.truncate_prompt_tokens">truncate_prompt_tokens (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceStrategy.TWOSHOT">TWOSHOT (tensorrt_llm.functional.AllReduceStrategy attribute)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="U">U</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceStrategy.UB">UB (tensorrt_llm.functional.AllReduceStrategy attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.unary">unary() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.unbind">unbind() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.unbind">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.DiT.unpatchify">unpatchify() (tensorrt_llm.models.DiT method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.unsqueeze">unsqueeze() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.update">update() (tensorrt_llm.llmapi.BuildConfig method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.update">(tensorrt_llm.runtime.SamplingConfig method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.update_from_dict">update_from_dict() (tensorrt_llm.llmapi.BuildConfig method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.update_kv_cache_type">update_kv_cache_type() (tensorrt_llm.llmapi.BuildConfig method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.update_output_ids_by_offset">update_output_ids_by_offset() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceParams.update_strategy">update_strategy() (tensorrt_llm.functional.AllReduceParams method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.use_beam_hyps">use_beam_hyps (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.SamplingParams.use_beam_search">use_beam_search (tensorrt_llm.llmapi.SamplingParams attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.use_fused_mlp">use_fused_mlp (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.use_gpt_attention_plugin">use_gpt_attention_plugin (tensorrt_llm.runtime.GenerationSession property)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.use_kv_cache">use_kv_cache (tensorrt_llm.runtime.GenerationSession property)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.DecoderModel.use_lora">use_lora() (tensorrt_llm.models.DecoderModel method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.EncoderModel.use_lora">(tensorrt_llm.models.EncoderModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTForCausalLM.use_lora">(tensorrt_llm.models.GPTForCausalLM method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.LLaMAForCausalLM.use_lora">(tensorrt_llm.models.LLaMAForCausalLM method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.MLLaMAModel.use_lora">(tensorrt_llm.models.MLLaMAModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.Phi3ForCausalLM.use_lora">(tensorrt_llm.models.Phi3ForCausalLM method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.use_lora_plugin">use_lora_plugin (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.use_lora_plugin">(tensorrt_llm.runtime.ModelRunner property)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.use_mamba_conv1d_plugin">use_mamba_conv1d_plugin (tensorrt_llm.runtime.GenerationSession property)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceConfig.USE_MEMCPY">USE_MEMCPY (tensorrt_llm.functional.AllReduceConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.use_meta_recipe">use_meta_recipe (tensorrt_llm.llmapi.QuantConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.use_mrope">use_mrope (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantConfig.use_plugin_sq">use_plugin_sq (tensorrt_llm.llmapi.QuantConfig property)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.EncoderModel.use_prompt_tuning">use_prompt_tuning() (tensorrt_llm.models.EncoderModel method)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.use_refit">use_refit (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.use_strip_plan">use_strip_plan (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="V">V</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.CapacitySchedulerPolicy.value">value (tensorrt_llm.llmapi.CapacitySchedulerPolicy property)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaConfig.VERBATIM">VERBATIM (tensorrt_llm.models.GemmaConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MultimodalModelRunner.video_preprocess">video_preprocess() (tensorrt_llm.runtime.MultimodalModelRunner method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.view">view() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.view">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.visualize_network">visualize_network (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.vocab_size">vocab_size (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.vocab_size">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.vocab_size">(tensorrt_llm.runtime.ModelRunner property)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.vocab_size">(tensorrt_llm.runtime.ModelRunnerCpp property)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.vocab_size_padded">vocab_size_padded (tensorrt_llm.runtime.ModelRunner property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.vocab_size_padded">(tensorrt_llm.runtime.ModelRunnerCpp property)</a>
</li>
      </ul></li>
  </ul></td>
</tr></table>

<h2 id="W">W</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.W4A16">W4A16 (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.W4A16_AWQ">W4A16_AWQ (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.W4A16_GPTQ">W4A16_GPTQ (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.W4A8_AWQ">W4A8_AWQ (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.W4A8_QSERVE_PER_CHANNEL">W4A8_QSERVE_PER_CHANNEL (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.W4A8_QSERVE_PER_GROUP">W4A8_QSERVE_PER_GROUP (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.W8A16">W8A16 (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.W8A16_GPTQ">W8A16_GPTQ (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_CHANNEL">W8A8_SQ_PER_CHANNEL (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_CHANNEL_PER_TENSOR_PLUGIN">W8A8_SQ_PER_CHANNEL_PER_TENSOR_PLUGIN (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_CHANNEL_PER_TOKEN_PLUGIN">W8A8_SQ_PER_CHANNEL_PER_TOKEN_PLUGIN (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_TENSOR_PER_TOKEN_PLUGIN">W8A8_SQ_PER_TENSOR_PER_TOKEN_PLUGIN (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_TENSOR_PLUGIN">W8A8_SQ_PER_TENSOR_PLUGIN (tensorrt_llm.llmapi.QuantAlgo attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.LinearBase.weight_is_kn">weight_is_kn() (tensorrt_llm.layers.linear.LinearBase method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.DeepseekV2Attention.weight_loader">weight_loader() (tensorrt_llm.layers.attention.DeepseekV2Attention method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.Embedding.weight_loader">(tensorrt_llm.layers.embedding.Embedding method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.LinearBase.weight_loader">(tensorrt_llm.layers.linear.LinearBase method)</a>
</li>
      </ul></li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.weight_sparsity">weight_sparsity (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.BuildConfig.weight_streaming">weight_streaming (tensorrt_llm.llmapi.BuildConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.where">where() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.WhisperEncoder">WhisperEncoder (class in tensorrt_llm.models)</a>
</li>
      <li><a href="llm-api/reference.html#tensorrt_llm.llmapi.LLM.workspace">workspace (tensorrt_llm.llmapi.LLM property)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="Y">Y</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.yarn">yarn (tensorrt_llm.functional.PositionEmbeddingType attribute)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RotaryScalingType.yarn">(tensorrt_llm.functional.RotaryScalingType attribute)</a>
</li>
      </ul></li>
  </ul></td>
</tr></table>


           </div>
          </div>
          <footer>

  <hr/>

  <div role="contentinfo">
<jinja2.runtime.BlockReference object at 0x7fed13c3c550>

<div class="footer">
    <p>
        Copyright © 2024 NVIDIA Corporation
    </p>
    <p>
        <a class="Link" href="https://www.nvidia.com/en-us/about-nvidia/privacy-policy/" target="_blank" rel="noopener"
            data-cms-ai="0">Privacy Policy</a> |
        <a class="Link" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/" target="_blank" rel="noopener"
            data-cms-ai="0">Manage My Privacy</a> |
        <a class="Link" href="https://www.nvidia.com/en-us/preferences/start/" target="_blank" rel="noopener"
            data-cms-ai="0">Do Not Sell or Share My Data</a> |
        <a class="Link" href="https://www.nvidia.com/en-us/about-nvidia/terms-of-service/" target="_blank"
            rel="noopener" data-cms-ai="0">Terms of Service</a> |
        <a class="Link" href="https://www.nvidia.com/en-us/about-nvidia/accessibility/" target="_blank" rel="noopener"
            data-cms-ai="0">Accessibility</a> |
        <a class="Link" href="https://www.nvidia.com/en-us/about-nvidia/company-policies/" target="_blank"
            rel="noopener" data-cms-ai="0">Corporate Policies</a> |
        <a class="Link" href="https://www.nvidia.com/en-us/product-security/" target="_blank" rel="noopener"
            data-cms-ai="0">Product Security</a> |
        <a class="Link" href="https://www.nvidia.com/en-us/contact/" target="_blank" rel="noopener"
            data-cms-ai="0">Contact</a>
    </p>
</div>


  </div>


</footer>
        </div>
      </div>
    </section>
  </div>
  <script>
      jQuery(function () {
          SphinxRtdTheme.Navigation.enable(true);
      });
  </script>

</body>
</html>