TensorRT-LLMs/genindex.html

<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="./">
<head>
  <meta charset="utf-8" />
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
  <title>Index &mdash; tensorrt_llm  documentation</title>
      <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=80d5e7a1" />
      <link rel="stylesheet" type="text/css" href="_static/css/theme.css?v=19f00094" />


  <!--[if lt IE 9]>
    <script src="_static/js/html5shiv.min.js"></script>
  <![endif]-->

        <script src="_static/jquery.js?v=5d32c60e"></script>
        <script src="_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
        <script src="_static/documentation_options.js?v=5929fcd5"></script>
        <script src="_static/doctools.js?v=888ff710"></script>
        <script src="_static/sphinx_highlight.js?v=dc90522c"></script>
    <script src="_static/js/theme.js"></script>
    <link rel="index" title="Index" href="#" />
    <link rel="search" title="Search" href="search.html" />
</head>

<body class="wy-body-for-nav">
  <div class="wy-grid-for-nav">
    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
      <div class="wy-side-scroll">
        <div class="wy-side-nav-search" >


          <a href="index.html" class="icon icon-home">
            tensorrt_llm
          </a>
<div role="search">
  <form id="rtd-search-form" class="wy-form" action="search.html" method="get">
    <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>
        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
              <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="overview.html">Overview</a></li>
<li class="toctree-l1"><a class="reference internal" href="quick-start-guide.html">Quick Start Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="release-notes.html">Release Notes</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Installation</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="installation/linux.html">Installing on Linux</a></li>
<li class="toctree-l1"><a class="reference internal" href="installation/build-from-source-linux.html">Building from Source Code on Linux</a></li>
<li class="toctree-l1"><a class="reference internal" href="installation/windows.html">Installing on Windows</a></li>
<li class="toctree-l1"><a class="reference internal" href="installation/build-from-source-windows.html">Building from Source Code on Windows</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Architecture</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="architecture/overview.html">TensorRT-LLM Architecture</a></li>
<li class="toctree-l1"><a class="reference internal" href="architecture/core-concepts.html">Model Definition</a></li>
<li class="toctree-l1"><a class="reference internal" href="architecture/core-concepts.html#compilation">Compilation</a></li>
<li class="toctree-l1"><a class="reference internal" href="architecture/core-concepts.html#runtime">Runtime</a></li>
<li class="toctree-l1"><a class="reference internal" href="architecture/core-concepts.html#multi-gpu-and-multi-node-support">Multi-GPU and Multi-Node Support</a></li>
<li class="toctree-l1"><a class="reference internal" href="architecture/checkpoint.html">TensorRT-LLM Checkpoint</a></li>
<li class="toctree-l1"><a class="reference internal" href="architecture/workflow.html">TensorRT-LLM Build Workflow</a></li>
<li class="toctree-l1"><a class="reference internal" href="architecture/add-model.html">Adding a Model</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Advanced</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="advanced/gpt-attention.html">Multi-Head, Multi-Query, and Group-Query Attention</a></li>
<li class="toctree-l1"><a class="reference internal" href="advanced/gpt-runtime.html">C++ GPT Runtime</a></li>
<li class="toctree-l1"><a class="reference internal" href="advanced/graph-rewriting.html">Graph Rewriting Module</a></li>
<li class="toctree-l1"><a class="reference internal" href="advanced/batch-manager.html">The Batch Manager in TensorRT-LLM</a></li>
<li class="toctree-l1"><a class="reference internal" href="advanced/inference-request.html">Inference Request</a></li>
<li class="toctree-l1"><a class="reference internal" href="advanced/lora.html">Run gpt-2b + LoRA using GptManager / cpp runtime</a></li>
<li class="toctree-l1"><a class="reference internal" href="advanced/expert-parallelism.html">Expert Parallelism in TensorRT-LLM</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Performance</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="performance/perf-overview.html">Overview</a></li>
<li class="toctree-l1"><a class="reference internal" href="performance/perf-best-practices.html">Best Practices for Tuning the Performance of TensorRT-LLM</a></li>
<li class="toctree-l1"><a class="reference internal" href="performance/perf-analysis.html">Performance Analysis</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Reference</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="reference/troubleshooting.html">Troubleshooting</a></li>
<li class="toctree-l1"><a class="reference internal" href="reference/support-matrix.html">Support Matrix</a></li>
<li class="toctree-l1"><a class="reference internal" href="reference/precision.html">Numerical Precision</a></li>
<li class="toctree-l1"><a class="reference internal" href="reference/memory.html">Memory Usage of TensorRT-LLM</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">C++ API</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="_cpp_gen/runtime.html">Runtime</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Python API</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="python-api/tensorrt_llm.layers.html">Layers</a></li>
<li class="toctree-l1"><a class="reference internal" href="python-api/tensorrt_llm.functional.html">Functionals</a></li>
<li class="toctree-l1"><a class="reference internal" href="python-api/tensorrt_llm.models.html">Models</a></li>
<li class="toctree-l1"><a class="reference internal" href="python-api/tensorrt_llm.plugin.html">Plugin</a></li>
<li class="toctree-l1"><a class="reference internal" href="python-api/tensorrt_llm.quantization.html">Quantization</a></li>
<li class="toctree-l1"><a class="reference internal" href="python-api/tensorrt_llm.runtime.html">Runtime</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Blogs</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="blogs/H100vsA100.html">H100 has 4.6x A100 Performance in TensorRT-LLM, achieving 10,000 tok/s at 100ms to first token</a></li>
<li class="toctree-l1"><a class="reference internal" href="blogs/H200launch.html">H200 achieves nearly 12,000 tokens/sec on Llama2-13B with TensorRT-LLM</a></li>
<li class="toctree-l1"><a class="reference internal" href="blogs/Falcon180B-H200.html">Falcon-180B on a single H200 GPU with INT4 AWQ, and 6.7x faster Llama-70B over A100</a></li>
<li class="toctree-l1"><a class="reference internal" href="blogs/quantization-in-TRT-LLM.html">Speed up inference with SOTA quantization techniques in TRT-LLM</a></li>
<li class="toctree-l1"><a class="reference internal" href="blogs/XQA-kernel.html">New XQA-kernel provides 2.4x more Llama-70B throughput within the same latency budget</a></li>
</ul>

        </div>
      </div>
    </nav>

    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
          <a href="index.html">tensorrt_llm</a>
      </nav>

      <div class="wy-nav-content">
        <div class="rst-content">
          <div role="navigation" aria-label="Page navigation">
  <ul class="wy-breadcrumbs">
      <li><a href="index.html" class="icon icon-home" aria-label="Home"></a></li>
      <li class="breadcrumb-item active">Index</li>
      <li class="wy-breadcrumbs-aside">
      </li>
  </ul>
  <hr/>
</div>
          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
           <div itemprop="articleBody">


<h1 id="index">Index</h1>

<div class="genindex-jumpbox">
 <a href="#A"><strong>A</strong></a>
 | <a href="#B"><strong>B</strong></a>
 | <a href="#C"><strong>C</strong></a>
 | <a href="#D"><strong>D</strong></a>
 | <a href="#E"><strong>E</strong></a>
 | <a href="#F"><strong>F</strong></a>
 | <a href="#G"><strong>G</strong></a>
 | <a href="#H"><strong>H</strong></a>
 | <a href="#I"><strong>I</strong></a>
 | <a href="#K"><strong>K</strong></a>
 | <a href="#L"><strong>L</strong></a>
 | <a href="#M"><strong>M</strong></a>
 | <a href="#N"><strong>N</strong></a>
 | <a href="#O"><strong>O</strong></a>
 | <a href="#P"><strong>P</strong></a>
 | <a href="#Q"><strong>Q</strong></a>
 | <a href="#R"><strong>R</strong></a>
 | <a href="#S"><strong>S</strong></a>
 | <a href="#T"><strong>T</strong></a>
 | <a href="#U"><strong>U</strong></a>
 | <a href="#V"><strong>V</strong></a>
 | <a href="#W"><strong>W</strong></a>

</div>
<h2 id="A">A</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.abs">abs() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.abs">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.activation">activation() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.add">add() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.KVCacheManager.add_sequence">add_sequence() (tensorrt_llm.runtime.KVCacheManager method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.alibi">alibi (tensorrt_llm.functional.PositionEmbeddingType attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.alibi_with_scale">alibi_with_scale (tensorrt_llm.functional.PositionEmbeddingType attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.allgather">allgather() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.allreduce">allreduce() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceConfig">AllReduceConfig (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceStrategy">AllReduceStrategy (class in tensorrt_llm.functional)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.RopeEmbeddingUtils.apply_rotary_pos_emb">apply_rotary_pos_emb() (tensorrt_llm.layers.attention.RopeEmbeddingUtils static method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.RopeEmbeddingUtils.apply_rotary_pos_emb_chatglm">apply_rotary_pos_emb_chatglm() (tensorrt_llm.layers.attention.RopeEmbeddingUtils static method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.arange">arange() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.argmax">argmax() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.assertion">assertion() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.Attention">Attention (class in tensorrt_llm.layers.attention)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AttentionMaskType">AttentionMaskType (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.AttentionParams">AttentionParams (class in tensorrt_llm.layers.attention)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceStrategy.AUTO">AUTO (tensorrt_llm.functional.AllReduceStrategy attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.avg_pool2d">avg_pool2d() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.pooling.AvgPool2d">AvgPool2d (class in tensorrt_llm.layers.pooling)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="B">B</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.bad_words_list">bad_words_list (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.BaichuanForCausalLM">BaichuanForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.batch_size">batch_size (tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.beam_search_diversity_rate">beam_search_diversity_rate (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.bert_attention">bert_attention() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.BertAttention">BertAttention (class in tensorrt_llm.layers.attention)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.BertForQuestionAnswering">BertForQuestionAnswering (class in tensorrt_llm.models)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.BertForSequenceClassification">BertForSequenceClassification (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.BertModel">BertModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AttentionMaskType.bidirectional">bidirectional (tensorrt_llm.functional.AttentionMaskType attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AttentionMaskType.bidirectionalglm">bidirectionalglm (tensorrt_llm.functional.AttentionMaskType attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.BloomForCausalLM">BloomForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.BloomModel">BloomModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.broadcast_helper">broadcast_helper() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.buffer_allocated">buffer_allocated (tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="C">C</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.cast.Cast">Cast (class in tensorrt_llm.layers.cast)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.cast">cast() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.cast">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AttentionMaskType.causal">causal (tensorrt_llm.functional.AttentionMaskType attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.chatglm">chatglm (tensorrt_llm.functional.PositionEmbeddingType attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.ChatGLMForCausalLM">ChatGLMForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ChatGLMGenerationSession">ChatGLMGenerationSession (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.ChatGLMModel">ChatGLMModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.ChatGLMForCausalLM.check_config">check_config() (tensorrt_llm.models.ChatGLMForCausalLM method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.FalconForCausalLM.check_config">(tensorrt_llm.models.FalconForCausalLM method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaForCausalLM.check_config">(tensorrt_llm.models.GemmaForCausalLM method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTForCausalLM.check_config">(tensorrt_llm.models.GPTForCausalLM method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTJForCausalLM.check_config">(tensorrt_llm.models.GPTJForCausalLM method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.LLaMAForCausalLM.check_config">(tensorrt_llm.models.LLaMAForCausalLM method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.MPTForCausalLM.check_config">(tensorrt_llm.models.MPTForCausalLM method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.OPTForCausalLM.check_config">(tensorrt_llm.models.OPTForCausalLM method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PhiForCausalLM.check_config">(tensorrt_llm.models.PhiForCausalLM method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedModel.check_config">(tensorrt_llm.models.PretrainedModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.QWenForCausalLM.check_config">(tensorrt_llm.models.QWenForCausalLM method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.choices">choices() (tensorrt_llm.functional.PositionEmbeddingType static method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.chunk">chunk() (in module tensorrt_llm.functional)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.clip">clip() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.ColumnLinear">ColumnLinear (in module tensorrt_llm.layers.linear)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.compute_relative_bias">compute_relative_bias() (in module tensorrt_llm.layers.attention)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.concat">concat() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.conditional">conditional() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.constant">constant() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.constant_to_tensor_">constant_to_tensor_() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.Session.context">context (tensorrt_llm.runtime.Session property)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.conv.Conv1d">Conv1d (class in tensorrt_llm.layers.conv)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.conv1d">conv1d() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.conv.Conv2d">Conv2d (class in tensorrt_llm.layers.conv)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.conv2d">conv2d() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.conv_transpose2d">conv_transpose2d() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PhiForCausalLM.convert_hf_checkpoint">convert_hf_checkpoint() (tensorrt_llm.models.PhiForCausalLM class method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.conv.ConvTranspose2d">ConvTranspose2d (class in tensorrt_llm.layers.conv)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.cos">cos() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.RopeEmbeddingUtils.create_sinusoidal_positions">create_sinusoidal_positions() (tensorrt_llm.layers.attention.RopeEmbeddingUtils static method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.cross_attention">cross_attention (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.cross_attention">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.cuda_graph_mode">cuda_graph_mode (tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.cuda_stream_guard">cuda_stream_guard() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.cumsum">cumsum() (in module tensorrt_llm.functional)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="D">D</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.debug_mode">debug_mode (tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.debug_tensors_to_save">debug_tensors_to_save (tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.decode">decode() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.decode_batch">decode_batch() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.decode_regular">decode_regular() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.decode_stream">decode_stream() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.DecoderModel">DecoderModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.LLaMAForCausalLM.default_plugin_config">default_plugin_config() (tensorrt_llm.models.LLaMAForCausalLM method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.device">device (tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.DimRange">DimRange (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.div">div() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.dtype">dtype (tensorrt_llm.functional.Tensor property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.dtype">(tensorrt_llm.runtime.GenerationSession property)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.dtype">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.dtype">(tensorrt_llm.runtime.ModelRunner property)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.dtype">(tensorrt_llm.runtime.ModelRunnerCpp property)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.TensorInfo.dtype">(tensorrt_llm.runtime.TensorInfo attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.dump_debug_buffers">dump_debug_buffers() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RotaryScalingType.dynamic">dynamic (tensorrt_llm.functional.RotaryScalingType attribute)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="E">E</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.early_stop_criteria">early_stop_criteria() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.early_stopping">early_stopping (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.einsum">einsum() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.elementwise_binary">elementwise_binary() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.Embedding">Embedding (class in tensorrt_llm.layers.embedding)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.embedding">embedding() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.EncoderModel">EncoderModel (class in tensorrt_llm.models)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.end_id">end_id (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.Session.engine">engine (tensorrt_llm.runtime.Session property)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.eq">eq() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.exp">exp() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.expand">expand() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.expand_dims">expand_dims() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.expand_dims_like">expand_dims_like() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.expand_mask">expand_mask() (in module tensorrt_llm.functional)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="F">F</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.FalconForCausalLM">FalconForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.FalconModel">FalconModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.KeyValueCacheParams.fill_none_tensor_list">fill_none_tensor_list() (tensorrt_llm.layers.attention.KeyValueCacheParams method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.filter_medusa_logits">filter_medusa_logits() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.finalize_decoder">finalize_decoder() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.find_best_medusa_path">find_best_medusa_path() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.first_layer">first_layer (tensorrt_llm.runtime.GenerationSession property)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.flip">flip() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.activation.Mish.forward">forward() (tensorrt_llm.layers.activation.Mish method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.Attention.forward">(tensorrt_llm.layers.attention.Attention method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.BertAttention.forward">(tensorrt_llm.layers.attention.BertAttention method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.cast.Cast.forward">(tensorrt_llm.layers.cast.Cast method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.conv.Conv1d.forward">(tensorrt_llm.layers.conv.Conv1d method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.conv.Conv2d.forward">(tensorrt_llm.layers.conv.Conv2d method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.conv.ConvTranspose2d.forward">(tensorrt_llm.layers.conv.ConvTranspose2d method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.Embedding.forward">(tensorrt_llm.layers.embedding.Embedding method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.PromptTuningEmbedding.forward">(tensorrt_llm.layers.embedding.PromptTuningEmbedding method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.Linear.forward">(tensorrt_llm.layers.linear.Linear method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.RowLinear.forward">(tensorrt_llm.layers.linear.RowLinear method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.FusedGatedMLP.forward">(tensorrt_llm.layers.mlp.FusedGatedMLP method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.GatedMLP.forward">(tensorrt_llm.layers.mlp.GatedMLP method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.MLP.forward">(tensorrt_llm.layers.mlp.MLP method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.normalization.GroupNorm.forward">(tensorrt_llm.layers.normalization.GroupNorm method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.normalization.LayerNorm.forward">(tensorrt_llm.layers.normalization.LayerNorm method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.normalization.RmsNorm.forward">(tensorrt_llm.layers.normalization.RmsNorm method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.pooling.AvgPool2d.forward">(tensorrt_llm.layers.pooling.AvgPool2d method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.BertForQuestionAnswering.forward">(tensorrt_llm.models.BertForQuestionAnswering method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.BertForSequenceClassification.forward">(tensorrt_llm.models.BertForSequenceClassification method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.BertModel.forward">(tensorrt_llm.models.BertModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.BloomModel.forward">(tensorrt_llm.models.BloomModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.ChatGLMModel.forward">(tensorrt_llm.models.ChatGLMModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.DecoderModel.forward">(tensorrt_llm.models.DecoderModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.EncoderModel.forward">(tensorrt_llm.models.EncoderModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.FalconModel.forward">(tensorrt_llm.models.FalconModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTJModel.forward">(tensorrt_llm.models.GPTJModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTModel.forward">(tensorrt_llm.models.GPTModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTNeoXModel.forward">(tensorrt_llm.models.GPTNeoXModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.LLaMAModel.forward">(tensorrt_llm.models.LLaMAModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.MambaLMHeadModel.forward">(tensorrt_llm.models.MambaLMHeadModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.MedusaForCausalLm.forward">(tensorrt_llm.models.MedusaForCausalLm method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.MPTModel.forward">(tensorrt_llm.models.MPTModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.OPTModel.forward">(tensorrt_llm.models.OPTModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PhiModel.forward">(tensorrt_llm.models.PhiModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.WhisperEncoder.forward">(tensorrt_llm.models.WhisperEncoder method)</a>
</li>
      </ul></li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.frequency_penalty">frequency_penalty (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedModel.from_checkpoint">from_checkpoint() (tensorrt_llm.models.PretrainedModel class method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedModel.from_config">from_config() (tensorrt_llm.models.PretrainedModel class method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.from_dict">from_dict() (tensorrt_llm.models.PretrainedConfig class method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.from_dir">from_dir() (tensorrt_llm.runtime.ModelRunner class method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.from_dir">(tensorrt_llm.runtime.ModelRunnerCpp class method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.from_engine">from_engine() (tensorrt_llm.runtime.ModelRunner class method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.Session.from_engine">(tensorrt_llm.runtime.Session static method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaForCausalLM.from_hugging_face">from_hugging_face() (tensorrt_llm.models.GemmaForCausalLM class method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.LLaMAForCausalLM.from_hugging_face">(tensorrt_llm.models.LLaMAForCausalLM class method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.from_json_file">from_json_file() (tensorrt_llm.models.PretrainedConfig class method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.LLaMAForCausalLM.from_meta_ckpt">from_meta_ckpt() (tensorrt_llm.models.LLaMAForCausalLM class method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.Session.from_serialized_engine">from_serialized_engine() (tensorrt_llm.runtime.Session static method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.from_string">from_string() (tensorrt_llm.functional.PositionEmbeddingType static method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.FusedGatedMLP">FusedGatedMLP (class in tensorrt_llm.layers.mlp)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.MLPType.FusedGatedMLP">(tensorrt_llm.functional.MLPType attribute)</a>
</li>
      </ul></li>
  </ul></td>
</tr></table>

<h2 id="G">G</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.GatedMLP">GatedMLP (class in tensorrt_llm.layers.mlp)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.MLPType.GatedMLP">(tensorrt_llm.functional.MLPType attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.gather">gather() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.gather_context_logits">gather_context_logits (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.gather_context_logits">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.gather_context_logits">(tensorrt_llm.runtime.ModelRunner property)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.gather_context_logits">(tensorrt_llm.runtime.ModelRunnerCpp property)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.gather_generation_logits">gather_generation_logits (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.gather_generation_logits">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.gather_generation_logits">(tensorrt_llm.runtime.ModelRunner property)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.gather_generation_logits">(tensorrt_llm.runtime.ModelRunnerCpp property)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.gather_last_token_logits">gather_last_token_logits() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.geglu">geglu() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.gelu">gelu() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GemmaForCausalLM">GemmaForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.generate">generate() (tensorrt_llm.runtime.ModelRunner method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.generate">(tensorrt_llm.runtime.ModelRunnerCpp method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.QWenForCausalLMGenerationSession.generate">(tensorrt_llm.runtime.QWenForCausalLMGenerationSession method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.generate_alibi_biases">generate_alibi_biases() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.generate_alibi_slopes">generate_alibi_slopes() (in module tensorrt_llm.functional)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSequence">GenerationSequence (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession">GenerationSession (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSequence.get_batch_idx">get_batch_idx() (tensorrt_llm.runtime.GenerationSequence method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.KVCacheManager.get_block_pointers">get_block_pointers() (tensorrt_llm.runtime.KVCacheManager method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.KeyValueCacheParams.get_first_past_key_value">get_first_past_key_value() (tensorrt_llm.layers.attention.KeyValueCacheParams method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.get_next_medusa_tokens">get_next_medusa_tokens() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.get_parent">get_parent() (tensorrt_llm.functional.Tensor method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSequence.get_seq_idx">get_seq_idx() (tensorrt_llm.runtime.GenerationSequence method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.get_users">get_users() (tensorrt_llm.functional.Tensor method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.gpt_attention">gpt_attention() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.gpt_attention_plugin">gpt_attention_plugin (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTForCausalLM">GPTForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTJForCausalLM">GPTJForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTJModel">GPTJModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTModel">GPTModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTNeoXForCausalLM">GPTNeoXForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTNeoXModel">GPTNeoXModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.group_norm">group_norm() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.normalization.GroupNorm">GroupNorm (class in tensorrt_llm.layers.normalization)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.LayerNormType.GroupNorm">(tensorrt_llm.functional.LayerNormType attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.gt">gt() (in module tensorrt_llm.functional)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="H">H</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.handle_per_step">handle_per_step() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.has_position_embedding">has_position_embedding (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.has_position_embedding">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.has_token_type_embedding">has_token_type_embedding (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.has_token_type_embedding">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.head_size">head_size (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.head_size">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.hidden_size">hidden_size (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.hidden_size">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.hidden_size">(tensorrt_llm.runtime.ModelRunner property)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.hidden_size">(tensorrt_llm.runtime.ModelRunnerCpp property)</a>
</li>
      </ul></li>
  </ul></td>
</tr></table>

<h2 id="I">I</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.identity">identity() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.index_select">index_select() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.Session.infer_shapes">infer_shapes() (tensorrt_llm.runtime.Session method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.interpolate">interpolate() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.is_alibi">is_alibi() (tensorrt_llm.functional.PositionEmbeddingType method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.is_dynamic">is_dynamic() (tensorrt_llm.functional.Tensor method)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.is_gated_activation">is_gated_activation() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.is_medusa_mode">is_medusa_mode (tensorrt_llm.runtime.GenerationSession property)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.is_rope">is_rope() (tensorrt_llm.functional.PositionEmbeddingType method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.is_trt_wrapper">is_trt_wrapper() (tensorrt_llm.functional.Tensor method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.AttentionParams.is_valid">is_valid() (tensorrt_llm.layers.attention.AttentionParams method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.KeyValueCacheParams.is_valid">(tensorrt_llm.layers.attention.KeyValueCacheParams method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.AttentionParams.is_valid_cross_attn">is_valid_cross_attn() (tensorrt_llm.layers.attention.AttentionParams method)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="K">K</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.KeyValueCacheParams">KeyValueCacheParams (class in tensorrt_llm.layers.attention)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.KVCacheManager">KVCacheManager (class in tensorrt_llm.runtime)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="L">L</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.last_layer">last_layer (tensorrt_llm.runtime.GenerationSession property)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.layer_norm">layer_norm() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.normalization.LayerNorm">LayerNorm (class in tensorrt_llm.layers.normalization)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.LayerNormType.LayerNorm">(tensorrt_llm.functional.LayerNormType attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.LayerNormPositionType">LayerNormPositionType (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.LayerNormType">LayerNormType (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.learned_absolute">learned_absolute (tensorrt_llm.functional.PositionEmbeddingType attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.length_penalty">length_penalty (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.Linear">Linear (class in tensorrt_llm.layers.linear)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RotaryScalingType.linear">linear (tensorrt_llm.functional.RotaryScalingType attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.LLaMAForCausalLM">LLaMAForCausalLM (class in tensorrt_llm.models)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.LLaMAModel">LLaMAModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedModel.load">load() (tensorrt_llm.models.PretrainedModel method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedModel.load_partial_weights">load_partial_weights() (tensorrt_llm.models.PretrainedModel method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.location">location (tensorrt_llm.functional.Tensor property)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.log">log() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.log">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.LogitsProcessor">LogitsProcessor (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.LogitsProcessorList">LogitsProcessorList (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.lora_plugin">lora_plugin (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.lora_plugin">lora_plugin() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.lora_target_modules">lora_target_modules (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.lt">lt() (in module tensorrt_llm.functional)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="M">M</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.make_causal_mask">make_causal_mask() (in module tensorrt_llm.layers.attention)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.mamba_conv1d">mamba_conv1d() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.mamba_conv1d_plugin">mamba_conv1d_plugin (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MambaLMHeadModelGenerationSession.mamba_d_conv">mamba_d_conv (tensorrt_llm.runtime.MambaLMHeadModelGenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.mamba_d_conv">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MambaLMHeadModelGenerationSession.mamba_d_state">mamba_d_state (tensorrt_llm.runtime.MambaLMHeadModelGenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.mamba_d_state">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MambaLMHeadModelGenerationSession.mamba_expand">mamba_expand (tensorrt_llm.runtime.MambaLMHeadModelGenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.mamba_expand">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.MambaLMHeadModel">MambaLMHeadModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MambaLMHeadModelGenerationSession">MambaLMHeadModelGenerationSession (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.mapping">mapping (tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.mark_output">mark_output() (tensorrt_llm.functional.Tensor method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.masked_scatter">masked_scatter() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.masked_select">masked_select() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.matmul">matmul() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.max">max() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.max">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.max_attention_window_size">max_attention_window_size (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.max_batch_size">max_batch_size (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.max_beam_width">max_beam_width (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.max_medusa_tokens">max_medusa_tokens (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.max_medusa_tokens">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.max_new_tokens">max_new_tokens (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.max_prompt_embedding_table_size">max_prompt_embedding_table_size (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.max_prompt_embedding_table_size">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.max_prompt_embedding_table_size">(tensorrt_llm.runtime.ModelRunner property)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.max_prompt_embedding_table_size">(tensorrt_llm.runtime.ModelRunnerCpp property)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.max_sequence_length">max_sequence_length (tensorrt_llm.runtime.ModelRunner property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.max_sequence_length">(tensorrt_llm.runtime.ModelRunnerCpp property)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.maximum">maximum() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.mean">mean() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.mean">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.medusa_paths">medusa_paths (tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.medusa_position_offsets">medusa_position_offsets (tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.medusa_temperature">medusa_temperature (tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.medusa_topks">medusa_topks (tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.medusa_tree_ids">medusa_tree_ids (tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.MedusaForCausalLm">MedusaForCausalLm (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.min_length">min_length (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.minimum">minimum() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.activation.Mish">Mish (class in tensorrt_llm.layers.activation)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.mlp.MLP">MLP (class in tensorrt_llm.layers.mlp)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.MLPType.MLP">(tensorrt_llm.functional.MLPType attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.MLPType">MLPType (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.model_name">model_name (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig">ModelConfig (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner">ModelRunner (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp">ModelRunnerCpp (class in tensorrt_llm.runtime)</a>
</li>
      <li>
    module

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#module-tensorrt_llm">tensorrt_llm</a>, <a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm">[1]</a>, <a href="python-api/tensorrt_llm.models.html#module-tensorrt_llm">[2]</a>, <a href="python-api/tensorrt_llm.plugin.html#module-tensorrt_llm">[3]</a>, <a href="python-api/tensorrt_llm.quantization.html#module-tensorrt_llm">[4]</a>, <a href="python-api/tensorrt_llm.runtime.html#module-tensorrt_llm">[5]</a>
</li>
        <li><a href="python-api/tensorrt_llm.functional.html#module-tensorrt_llm.functional">tensorrt_llm.functional</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.activation">tensorrt_llm.layers.activation</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.attention">tensorrt_llm.layers.attention</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.cast">tensorrt_llm.layers.cast</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.conv">tensorrt_llm.layers.conv</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.embedding">tensorrt_llm.layers.embedding</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.linear">tensorrt_llm.layers.linear</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.mlp">tensorrt_llm.layers.mlp</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.normalization">tensorrt_llm.layers.normalization</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.pooling">tensorrt_llm.layers.pooling</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#module-tensorrt_llm.models">tensorrt_llm.models</a>
</li>
        <li><a href="python-api/tensorrt_llm.plugin.html#module-tensorrt_llm.plugin">tensorrt_llm.plugin</a>
</li>
        <li><a href="python-api/tensorrt_llm.quantization.html#module-tensorrt_llm.quantization">tensorrt_llm.quantization</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#module-tensorrt_llm.runtime">tensorrt_llm.runtime</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.MPTForCausalLM">MPTForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.MPTModel">MPTModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.mul">mul() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.Linear.multiply_gather">multiply_gather() (tensorrt_llm.layers.linear.Linear method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.RowLinear.multiply_reduce">multiply_reduce() (tensorrt_llm.layers.linear.RowLinear method)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="N">N</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.name">name (tensorrt_llm.functional.Tensor property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.TensorInfo.name">(tensorrt_llm.runtime.TensorInfo attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceStrategy.NCCL">NCCL (tensorrt_llm.functional.AllReduceStrategy attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.ndim">ndim() (tensorrt_llm.functional.Tensor method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.network">network (tensorrt_llm.functional.Tensor property)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.next_medusa_input_ids">next_medusa_input_ids() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.non_gated_version">non_gated_version() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RotaryScalingType.none">none (tensorrt_llm.functional.RotaryScalingType attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.num_beams">num_beams (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.num_heads">num_heads (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.num_heads">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.num_heads">(tensorrt_llm.runtime.ModelRunner property)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.num_heads">(tensorrt_llm.runtime.ModelRunnerCpp property)</a>
</li>
      </ul></li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.num_heads_kv">num_heads_kv (tensorrt_llm.runtime.GenerationSession property)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.num_kv_heads">num_kv_heads (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.num_layers">num_layers (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.num_layers">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.num_layers">(tensorrt_llm.runtime.ModelRunner property)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.num_layers">(tensorrt_llm.runtime.ModelRunnerCpp property)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.num_medusa_heads">num_medusa_heads (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.num_medusa_heads">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.num_medusa_tokens">num_medusa_tokens (tensorrt_llm.runtime.GenerationSession attribute)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv48nvinfer1">nvinfer1 (C++ type)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="O">O</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceStrategy.ONESHOT">ONESHOT (tensorrt_llm.functional.AllReduceStrategy attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.op_and">op_and() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.op_or">op_or() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.OPTForCausalLM">OPTForCausalLM (class in tensorrt_llm.models)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.OPTModel">OPTModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.outer">outer() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.output_cum_log_probs">output_cum_log_probs (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.output_log_probs">output_log_probs (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.output_sequence_lengths">output_sequence_lengths (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="P">P</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.pad_id">pad_id (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AttentionMaskType.padding">padding (tensorrt_llm.functional.AttentionMaskType attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.paged_kv_cache">paged_kv_cache (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.paged_kv_cache">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.paged_state">paged_state (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.paged_state">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.ParallelLMHead">ParallelLMHead (class in tensorrt_llm.layers.linear)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.permute">permute() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.permute">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PhiForCausalLM">PhiForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PhiModel">PhiModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig">PluginConfig (class in tensorrt_llm.plugin)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType">PositionEmbeddingType (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.LayerNormPositionType.post_layernorm">post_layernorm (tensorrt_llm.functional.LayerNormPositionType attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.pow">pow() (in module tensorrt_llm.functional)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.pp_communicate_final_output_ids">pp_communicate_final_output_ids() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.pp_communicate_new_tokens">pp_communicate_new_tokens() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.LayerNormPositionType.pre_layernorm">pre_layernorm (tensorrt_llm.functional.LayerNormPositionType attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.ChatGLMForCausalLM.prepare_inputs">prepare_inputs() (tensorrt_llm.models.ChatGLMForCausalLM method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.DecoderModel.prepare_inputs">(tensorrt_llm.models.DecoderModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.EncoderModel.prepare_inputs">(tensorrt_llm.models.EncoderModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.MambaLMHeadModel.prepare_inputs">(tensorrt_llm.models.MambaLMHeadModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.MedusaForCausalLm.prepare_inputs">(tensorrt_llm.models.MedusaForCausalLm method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedModel.prepare_inputs">(tensorrt_llm.models.PretrainedModel method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.WhisperEncoder.prepare_inputs">(tensorrt_llm.models.WhisperEncoder method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.presence_penalty">presence_penalty (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig">PretrainedConfig (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedModel">PretrainedModel (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.process_logits_for_medusa_mode">process_logits_for_medusa_mode() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.PromptTuningEmbedding">PromptTuningEmbedding (class in tensorrt_llm.layers.embedding)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceConfig.PUSH_MODE">PUSH_MODE (tensorrt_llm.functional.AllReduceConfig attribute)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="Q">Q</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.QKVColumnLinear">QKVColumnLinear (class in tensorrt_llm.layers.linear)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.quant_mode">quant_mode (tensorrt_llm.models.PretrainedConfig property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.quant_mode">(tensorrt_llm.runtime.GenerationSession property)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.quant_mode">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.quantization.html#tensorrt_llm.quantization.QuantAlgo">QuantAlgo (class in tensorrt_llm.quantization)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.LLaMAForCausalLM.quantize">quantize() (tensorrt_llm.models.LLaMAForCausalLM class method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedModel.quantize">(tensorrt_llm.models.PretrainedModel class method)</a>
</li>
      </ul></li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.quantization.html#tensorrt_llm.quantization.quantize_and_export">quantize_and_export() (in module tensorrt_llm.quantization)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.quantize_model">quantize_model() (in module tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.quantization.html#tensorrt_llm.quantization.QuantMode">QuantMode (class in tensorrt_llm.quantization)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.QWenForCausalLM">QWenForCausalLM (class in tensorrt_llm.models)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.QWenForCausalLMGenerationSession">QWenForCausalLMGenerationSession (class in tensorrt_llm.runtime)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="R">R</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.random_seed">random_seed (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.rank">rank() (tensorrt_llm.functional.Tensor method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.recv">recv() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.relative">relative (tensorrt_llm.functional.PositionEmbeddingType attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedModel.release">release() (tensorrt_llm.models.PretrainedModel method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.relu">relu() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.remove_input_padding">remove_input_padding (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.remove_input_padding">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.remove_input_padding">(tensorrt_llm.runtime.ModelRunner property)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.remove_input_padding">(tensorrt_llm.runtime.ModelRunnerCpp property)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.repeat_interleave">repeat_interleave() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.repetition_penalty">repetition_penalty (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.replace_all_uses_with">replace_all_uses_with() (tensorrt_llm.functional.Tensor method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.return_dict">return_dict (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.rms_norm">rms_norm() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.normalization.RmsNorm">RmsNorm (class in tensorrt_llm.layers.normalization)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.LayerNormType.RmsNorm">(tensorrt_llm.functional.LayerNormType attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.rope_gpt_neox">rope_gpt_neox (tensorrt_llm.functional.PositionEmbeddingType attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.PositionEmbeddingType.rope_gptj">rope_gptj (tensorrt_llm.functional.PositionEmbeddingType attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.RopeEmbeddingUtils">RopeEmbeddingUtils (class in tensorrt_llm.layers.attention)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.RotaryScalingType">RotaryScalingType (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.RopeEmbeddingUtils.rotate_every_two">rotate_every_two() (tensorrt_llm.layers.attention.RopeEmbeddingUtils static method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.attention.RopeEmbeddingUtils.rotate_half">rotate_half() (tensorrt_llm.layers.attention.RopeEmbeddingUtils static method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.round">round() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.RowLinear">RowLinear (class in tensorrt_llm.layers.linear)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.Session.run">run() (tensorrt_llm.runtime.Session method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.runtime">runtime (tensorrt_llm.runtime.GenerationSession attribute)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.Session.runtime">(tensorrt_llm.runtime.Session property)</a>
</li>
      </ul></li>
  </ul></td>
</tr></table>

<h2 id="S">S</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig">SamplingConfig (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedModel.save_checkpoint">save_checkpoint() (tensorrt_llm.models.PretrainedModel method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.select">select() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.selective_scan">selective_scan() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.send">send() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.serialize_engine">serialize_engine() (tensorrt_llm.runtime.ModelRunner method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.Session">Session (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#c.SET_FROM_OPTIONAL">SET_FROM_OPTIONAL (C macro)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.set_if_not_exist">set_if_not_exist() (tensorrt_llm.models.PretrainedConfig method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.set_rank">set_rank() (tensorrt_llm.models.PretrainedConfig method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.Session.set_shapes">set_shapes() (tensorrt_llm.runtime.Session method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.setup">setup() (tensorrt_llm.runtime.GenerationSession method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.MambaLMHeadModelGenerationSession.setup">(tensorrt_llm.runtime.MambaLMHeadModelGenerationSession method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.shape">shape (tensorrt_llm.functional.Tensor property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.TensorInfo.shape">(tensorrt_llm.runtime.TensorInfo attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.shape">shape() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.sigmoid">sigmoid() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.silu">silu() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.sin">sin() (in module tensorrt_llm.functional)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.sink_token_length">sink_token_length (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.size">size() (tensorrt_llm.functional.Tensor method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.skip_cross_qkv">skip_cross_qkv (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.slice">slice() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.softmax">softmax() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.softplus">softplus() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.split">split() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.split">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.sqrt">sqrt() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.sqrt">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.squared_relu">squared_relu() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.stack">stack() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.KVCacheManager.step">step() (tensorrt_llm.runtime.KVCacheManager method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.stop_words_list">stop_words_list (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.StoppingCriteria">StoppingCriteria (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.StoppingCriteriaList">StoppingCriteriaList (class in tensorrt_llm.runtime)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.sub">sub() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.sum">sum() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.swiglu">swiglu() (in module tensorrt_llm.functional)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="T">T</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.tanh">tanh() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.temperature">temperature (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor">Tensor (class in tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.TensorInfo">TensorInfo (class in tensorrt_llm.runtime)</a>
</li>
      <li>
    tensorrt_llm

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#module-tensorrt_llm">module</a>, <a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm">[1]</a>, <a href="python-api/tensorrt_llm.models.html#module-tensorrt_llm">[2]</a>, <a href="python-api/tensorrt_llm.plugin.html#module-tensorrt_llm">[3]</a>, <a href="python-api/tensorrt_llm.quantization.html#module-tensorrt_llm">[4]</a>, <a href="python-api/tensorrt_llm.runtime.html#module-tensorrt_llm">[5]</a>
</li>
      </ul></li>
      <li><a href="_cpp_gen/executor.html#_CPPv412tensorrt_llm">tensorrt_llm (C++ type)</a>, <a href="_cpp_gen/executor.html#_CPPv412tensorrt_llm">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv412tensorrt_llm">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[4]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[5]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[6]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[7]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[8]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[9]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[10]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[11]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[12]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[13]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[14]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[15]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[16]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[17]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[18]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[19]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[20]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[21]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[22]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[23]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[24]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[25]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[26]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[27]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[28]</a>, <a href="_cpp_gen/runtime.html#_CPPv412tensorrt_llm">[29]</a>
</li>
      <li>
    tensorrt_llm.functional

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#module-tensorrt_llm.functional">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.layers.activation

      <ul>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.activation">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.layers.attention

      <ul>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.attention">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.layers.cast

      <ul>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.cast">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.layers.conv

      <ul>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.conv">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.layers.embedding

      <ul>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.embedding">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.layers.linear

      <ul>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.linear">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.layers.mlp

      <ul>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.mlp">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.layers.normalization

      <ul>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.normalization">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.layers.pooling

      <ul>
        <li><a href="python-api/tensorrt_llm.layers.html#module-tensorrt_llm.layers.pooling">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.models

      <ul>
        <li><a href="python-api/tensorrt_llm.models.html#module-tensorrt_llm.models">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.plugin

      <ul>
        <li><a href="python-api/tensorrt_llm.plugin.html#module-tensorrt_llm.plugin">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.quantization

      <ul>
        <li><a href="python-api/tensorrt_llm.quantization.html#module-tensorrt_llm.quantization">module</a>
</li>
      </ul></li>
      <li>
    tensorrt_llm.runtime

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#module-tensorrt_llm.runtime">module</a>
</li>
      </ul></li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm13batch_managerE">tensorrt_llm::batch_manager (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm13batch_manager16kv_cache_managerE">tensorrt_llm::batch_manager::kv_cache_manager (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executorE">tensorrt_llm::executor (C++ type)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executorE">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executorE">[2]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12BatchingTypeE">tensorrt_llm::executor::BatchingType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12BatchingType9kINFLIGHTE">tensorrt_llm::executor::BatchingType::kINFLIGHT (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12BatchingType7kSTATICE">tensorrt_llm::executor::BatchingType::kSTATIC (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10BeamTokensE">tensorrt_llm::executor::BeamTokens (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17CommunicationModeE">tensorrt_llm::executor::CommunicationMode (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17CommunicationMode7kLEADERE">tensorrt_llm::executor::CommunicationMode::kLEADER (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17CommunicationTypeE">tensorrt_llm::executor::CommunicationType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17CommunicationType4kMPIE">tensorrt_llm::executor::CommunicationType::kMPI (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8DataTypeE">tensorrt_llm::executor::DataType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8DataType5kBF16E">tensorrt_llm::executor::DataType::kBF16 (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8DataType5kBOOLE">tensorrt_llm::executor::DataType::kBOOL (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8DataType5kFP16E">tensorrt_llm::executor::DataType::kFP16 (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8DataType5kFP32E">tensorrt_llm::executor::DataType::kFP32 (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8DataType4kFP8E">tensorrt_llm::executor::DataType::kFP8 (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8DataType6kINT32E">tensorrt_llm::executor::DataType::kINT32 (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8DataType6kINT64E">tensorrt_llm::executor::DataType::kINT64 (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8DataType5kINT8E">tensorrt_llm::executor::DataType::kINT8 (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8DataType6kUINT8E">tensorrt_llm::executor::DataType::kUINT8 (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8DataType8kUNKNOWNE">tensorrt_llm::executor::DataType::kUNKNOWN (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6detailE">tensorrt_llm::executor::detail (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE">tensorrt_llm::executor::detail::ofITensor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6detail9toITensorERK6Tensor">tensorrt_llm::executor::detail::toITensor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8ExecutorE">tensorrt_llm::executor::Executor (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt8optionalI6IdTypeEERKNSt8optionalINSt6chrono12millisecondsEEE">tensorrt_llm::executor::Executor::awaitResponses (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor13cancelRequestE6IdType">tensorrt_llm::executor::Executor::cancelRequest (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8Executor18canEnqueueRequestsEv">tensorrt_llm::executor::Executor::canEnqueueRequests (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor14enqueueRequestERK7Request">tensorrt_llm::executor::Executor::enqueueRequest (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor15enqueueRequestsERKNSt6vectorI7RequestEE">tensorrt_llm::executor::Executor::enqueueRequests (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEERK14ExecutorConfig">tensorrt_llm::executor::Executor::Executor (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt6vectorI7uint8_tEERKNSt6stringE9ModelTypeRK14ExecutorConfig">[2]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor23getLatestIterationStatsEv">tensorrt_llm::executor::Executor::getLatestIterationStats (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor21getLatestRequestStatsEv">tensorrt_llm::executor::Executor::getLatestRequestStats (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8Executor20getNumResponsesReadyERKNSt8optionalI6IdTypeEE">tensorrt_llm::executor::Executor::getNumResponsesReady (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor5mImplE">tensorrt_llm::executor::Executor::mImpl (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Executor8shutdownEv">tensorrt_llm::executor::Executor::shutdown (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8ExecutorD0Ev">tensorrt_llm::executor::Executor::~Executor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfigE">tensorrt_llm::executor::ExecutorConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE8SizeTypeRK15SchedulerConfigRK13KvCacheConfigbb8SizeType8SizeType12BatchingTypeNSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI13MedusaChoicesEE">tensorrt_llm::executor::ExecutorConfig::ExecutorConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getBatchingTypeEv">tensorrt_llm::executor::ExecutorConfig::getBatchingType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig23getEnableChunkedContextEv">tensorrt_llm::executor::ExecutorConfig::getEnableChunkedContext (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getIterStatsMaxIterationsEv">tensorrt_llm::executor::ExecutorConfig::getIterStatsMaxIterations (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig16getKvCacheConfigEv">tensorrt_llm::executor::ExecutorConfig::getKvCacheConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getLogitsPostProcessorMapEv">tensorrt_llm::executor::ExecutorConfig::getLogitsPostProcessorMap (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxBeamWidthEv">tensorrt_llm::executor::ExecutorConfig::getMaxBeamWidth (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig16getMedusaChoicesEv">tensorrt_llm::executor::ExecutorConfig::getMedusaChoices (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig20getNormalizeLogProbsEv">tensorrt_llm::executor::ExecutorConfig::getNormalizeLogProbs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig17getParallelConfigEv">tensorrt_llm::executor::ExecutorConfig::getParallelConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig18getPeftCacheConfigEv">tensorrt_llm::executor::ExecutorConfig::getPeftCacheConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig28getRequestStatsMaxIterationsEv">tensorrt_llm::executor::ExecutorConfig::getRequestStatsMaxIterations (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ExecutorConfig18getSchedulerConfigEv">tensorrt_llm::executor::ExecutorConfig::getSchedulerConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mBatchingTypeE">tensorrt_llm::executor::ExecutorConfig::mBatchingType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig21mEnableChunkedContextE">tensorrt_llm::executor::ExecutorConfig::mEnableChunkedContext (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mIterStatsMaxIterationsE">tensorrt_llm::executor::ExecutorConfig::mIterStatsMaxIterations (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig14mKvCacheConfigE">tensorrt_llm::executor::ExecutorConfig::mKvCacheConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mLogitsPostProcessorMapE">tensorrt_llm::executor::ExecutorConfig::mLogitsPostProcessorMap (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxBeamWidthE">tensorrt_llm::executor::ExecutorConfig::mMaxBeamWidth (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig14mMedusaChoicesE">tensorrt_llm::executor::ExecutorConfig::mMedusaChoices (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig18mNormalizeLogProbsE">tensorrt_llm::executor::ExecutorConfig::mNormalizeLogProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig15mParallelConfigE">tensorrt_llm::executor::ExecutorConfig::mParallelConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig16mPeftCacheConfigE">tensorrt_llm::executor::ExecutorConfig::mPeftCacheConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mRequestStatsMaxIterationsE">tensorrt_llm::executor::ExecutorConfig::mRequestStatsMaxIterations (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig16mSchedulerConfigE">tensorrt_llm::executor::ExecutorConfig::mSchedulerConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setBatchingTypeE12BatchingType">tensorrt_llm::executor::ExecutorConfig::setBatchingType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setEnableChunkedContextEb">tensorrt_llm::executor::ExecutorConfig::setEnableChunkedContext (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setIterStatsMaxIterationsE8SizeType">tensorrt_llm::executor::ExecutorConfig::setIterStatsMaxIterations (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setKvCacheConfigERK13KvCacheConfig">tensorrt_llm::executor::ExecutorConfig::setKvCacheConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setLogitsPostProcessorMapERK22LogitsPostProcessorMap">tensorrt_llm::executor::ExecutorConfig::setLogitsPostProcessorMap (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBeamWidthE8SizeType">tensorrt_llm::executor::ExecutorConfig::setMaxBeamWidth (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setMedusaChoicesERK13MedusaChoices">tensorrt_llm::executor::ExecutorConfig::setMedusaChoices (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setNormalizeLogProbsEb">tensorrt_llm::executor::ExecutorConfig::setNormalizeLogProbs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setParallelConfigERK14ParallelConfig">tensorrt_llm::executor::ExecutorConfig::setParallelConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setPeftCacheConfigERK15PeftCacheConfig">tensorrt_llm::executor::ExecutorConfig::setPeftCacheConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setRequestStatsMaxIterationsE8SizeType">tensorrt_llm::executor::ExecutorConfig::setRequestStatsMaxIterations (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setSchedulerConfigERK15SchedulerConfig">tensorrt_llm::executor::ExecutorConfig::setSchedulerConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor9FloatTypeE">tensorrt_llm::executor::FloatType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6IdTypeE">tensorrt_llm::executor::IdType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor21InflightBatchingStatsE">tensorrt_llm::executor::InflightBatchingStats (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor21InflightBatchingStats12microBatchIdE">tensorrt_llm::executor::InflightBatchingStats::microBatchId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor21InflightBatchingStats18numContextRequestsE">tensorrt_llm::executor::InflightBatchingStats::numContextRequests (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor21InflightBatchingStats12numCtxTokensE">tensorrt_llm::executor::InflightBatchingStats::numCtxTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor21InflightBatchingStats14numGenRequestsE">tensorrt_llm::executor::InflightBatchingStats::numGenRequests (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor21InflightBatchingStats17numPausedRequestsE">tensorrt_llm::executor::InflightBatchingStats::numPausedRequests (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor21InflightBatchingStats20numScheduledRequestsE">tensorrt_llm::executor::InflightBatchingStats::numScheduledRequests (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStatsE">tensorrt_llm::executor::IterationStats (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats11cpuMemUsageE">tensorrt_llm::executor::IterationStats::cpuMemUsage (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats11gpuMemUsageE">tensorrt_llm::executor::IterationStats::gpuMemUsage (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats21inflightBatchingStatsE">tensorrt_llm::executor::IterationStats::inflightBatchingStats (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats4iterE">tensorrt_llm::executor::IterationStats::iter (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats12kvCacheStatsE">tensorrt_llm::executor::IterationStats::kvCacheStats (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats20maxNumActiveRequestsE">tensorrt_llm::executor::IterationStats::maxNumActiveRequests (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats17numActiveRequestsE">tensorrt_llm::executor::IterationStats::numActiveRequests (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats14pinnedMemUsageE">tensorrt_llm::executor::IterationStats::pinnedMemUsage (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats19staticBatchingStatsE">tensorrt_llm::executor::IterationStats::staticBatchingStats (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14IterationStats9timestampE">tensorrt_llm::executor::IterationStats::timestamp (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13IterationTypeE">tensorrt_llm::executor::IterationType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17JsonSerializationE">tensorrt_llm::executor::JsonSerialization (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK12RequestStats">tensorrt_llm::executor::JsonSerialization::toJsonStr (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK14IterationStats">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK24RequestStatsPerIteration">[2]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor30kDefaultIterStatsMaxIterationsE">tensorrt_llm::executor::kDefaultIterStatsMaxIterations (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor33kDefaultRequestStatsMaxIterationsE">tensorrt_llm::executor::kDefaultRequestStatsMaxIterations (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfigE">tensorrt_llm::executor::KvCacheConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor13KvCacheConfig19getEnableBlockReuseEv">tensorrt_llm::executor::KvCacheConfig::getEnableBlockReuse (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor13KvCacheConfig24getFreeGpuMemoryFractionEv">tensorrt_llm::executor::KvCacheConfig::getFreeGpuMemoryFraction (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor13KvCacheConfig16getHostCacheSizeEv">tensorrt_llm::executor::KvCacheConfig::getHostCacheSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getMaxAttentionWindowEv">tensorrt_llm::executor::KvCacheConfig::getMaxAttentionWindow (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor13KvCacheConfig12getMaxTokensEv">tensorrt_llm::executor::KvCacheConfig::getMaxTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor13KvCacheConfig16getOnboardBlocksEv">tensorrt_llm::executor::KvCacheConfig::getOnboardBlocks (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor13KvCacheConfig18getSinkTokenLengthEv">tensorrt_llm::executor::KvCacheConfig::getSinkTokenLength (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEb">tensorrt_llm::executor::KvCacheConfig::KvCacheConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig17mEnableBlockReuseE">tensorrt_llm::executor::KvCacheConfig::mEnableBlockReuse (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig22mFreeGpuMemoryFractionE">tensorrt_llm::executor::KvCacheConfig::mFreeGpuMemoryFraction (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig14mHostCacheSizeE">tensorrt_llm::executor::KvCacheConfig::mHostCacheSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mMaxAttentionWindowE">tensorrt_llm::executor::KvCacheConfig::mMaxAttentionWindow (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig10mMaxTokensE">tensorrt_llm::executor::KvCacheConfig::mMaxTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig14mOnboardBlocksE">tensorrt_llm::executor::KvCacheConfig::mOnboardBlocks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13KvCacheConfig16mSinkTokenLengthE">tensorrt_llm::executor::KvCacheConfig::mSinkTokenLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12KvCacheStatsE">tensorrt_llm::executor::KvCacheStats (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12KvCacheStats13freeNumBlocksE">tensorrt_llm::executor::KvCacheStats::freeNumBlocks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12KvCacheStats12maxNumBlocksE">tensorrt_llm::executor::KvCacheStats::maxNumBlocks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12KvCacheStats14tokensPerBlockE">tensorrt_llm::executor::KvCacheStats::tokensPerBlock (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12KvCacheStats13usedNumBlocksE">tensorrt_llm::executor::KvCacheStats::usedNumBlocks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19LogitsPostProcessorE">tensorrt_llm::executor::LogitsPostProcessor (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor22LogitsPostProcessorMapE">tensorrt_llm::executor::LogitsPostProcessorMap (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10LoraConfigE">tensorrt_llm::executor::LoraConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor10LoraConfig9getConfigEv">tensorrt_llm::executor::LoraConfig::getConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor10LoraConfig9getTaskIdEv">tensorrt_llm::executor::LoraConfig::getTaskId (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor10LoraConfig10getWeightsEv">tensorrt_llm::executor::LoraConfig::getWeights (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE">tensorrt_llm::executor::LoraConfig::LoraConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10LoraConfig7mConfigE">tensorrt_llm::executor::LoraConfig::mConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10LoraConfig7mTaskIdE">tensorrt_llm::executor::LoraConfig::mTaskId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10LoraConfig8mWeightsE">tensorrt_llm::executor::LoraConfig::mWeights (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor13MedusaChoicesE">tensorrt_llm::executor::MedusaChoices (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10MemoryTypeE">tensorrt_llm::executor::MemoryType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10MemoryType4kCPUE">tensorrt_llm::executor::MemoryType::kCPU (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10MemoryType11kCPU_PINNEDE">tensorrt_llm::executor::MemoryType::kCPU_PINNED (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10MemoryType4kGPUE">tensorrt_llm::executor::MemoryType::kGPU (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10MemoryType8kUNKNOWNE">tensorrt_llm::executor::MemoryType::kUNKNOWN (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10MemoryType4kUVME">tensorrt_llm::executor::MemoryType::kUVM (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor9ModelTypeE">tensorrt_llm::executor::ModelType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor9ModelType13kDECODER_ONLYE">tensorrt_llm::executor::ModelType::kDECODER_ONLY (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12OutputConfigE">tensorrt_llm::executor::OutputConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12OutputConfig22excludeInputFromOutputE">tensorrt_llm::executor::OutputConfig::excludeInputFromOutput (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbb">tensorrt_llm::executor::OutputConfig::OutputConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12OutputConfig19returnContextLogitsE">tensorrt_llm::executor::OutputConfig::returnContextLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12OutputConfig22returnGenerationLogitsE">tensorrt_llm::executor::OutputConfig::returnGenerationLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12OutputConfig14returnLogProbsE">tensorrt_llm::executor::OutputConfig::returnLogProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfigE">tensorrt_llm::executor::ParallelConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ParallelConfig20getCommunicationModeEv">tensorrt_llm::executor::ParallelConfig::getCommunicationMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ParallelConfig20getCommunicationTypeEv">tensorrt_llm::executor::ParallelConfig::getCommunicationType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ParallelConfig12getDeviceIdsEv">tensorrt_llm::executor::ParallelConfig::getDeviceIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14ParallelConfig17getParticipantIdsEv">tensorrt_llm::executor::ParallelConfig::getParticipantIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfig9mCommModeE">tensorrt_llm::executor::ParallelConfig::mCommMode (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfig9mCommTypeE">tensorrt_llm::executor::ParallelConfig::mCommType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfig10mDeviceIdsE">tensorrt_llm::executor::ParallelConfig::mDeviceIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfig15mParticipantIdsE">tensorrt_llm::executor::ParallelConfig::mParticipantIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI8SizeTypeEEEENSt8optionalINSt6vectorI8SizeTypeEEEE">tensorrt_llm::executor::ParallelConfig::ParallelConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationModeE17CommunicationMode">tensorrt_llm::executor::ParallelConfig::setCommunicationMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationTypeE17CommunicationType">tensorrt_llm::executor::ParallelConfig::setCommunicationType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfig12setDeviceIdsERKNSt6vectorI8SizeTypeEE">tensorrt_llm::executor::ParallelConfig::setDeviceIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14ParallelConfig17setParticipantIdsERKNSt6vectorI8SizeTypeEE">tensorrt_llm::executor::ParallelConfig::setParticipantIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfigE">tensorrt_llm::executor::PeftCacheConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getDeviceCachePercentEv">tensorrt_llm::executor::PeftCacheConfig::getDeviceCachePercent (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig16getHostCacheSizeEv">tensorrt_llm::executor::PeftCacheConfig::getHostCacheSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig17getMaxAdapterSizeEv">tensorrt_llm::executor::PeftCacheConfig::getMaxAdapterSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig25getMaxPagesPerBlockDeviceEv">tensorrt_llm::executor::PeftCacheConfig::getMaxPagesPerBlockDevice (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig23getMaxPagesPerBlockHostEv">tensorrt_llm::executor::PeftCacheConfig::getMaxPagesPerBlockHost (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig17getNumCopyStreamsEv">tensorrt_llm::executor::PeftCacheConfig::getNumCopyStreams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig23getNumDeviceModuleLayerEv">tensorrt_llm::executor::PeftCacheConfig::getNumDeviceModuleLayer (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig19getNumEnsureWorkersEv">tensorrt_llm::executor::PeftCacheConfig::getNumEnsureWorkers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getNumHostModuleLayerEv">tensorrt_llm::executor::PeftCacheConfig::getNumHostModuleLayer (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig16getNumPutWorkersEv">tensorrt_llm::executor::PeftCacheConfig::getNumPutWorkers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getOptimalAdapterSizeEv">tensorrt_llm::executor::PeftCacheConfig::getOptimalAdapterSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mDeviceCachePercentE">tensorrt_llm::executor::PeftCacheConfig::mDeviceCachePercent (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig14mHostCacheSizeE">tensorrt_llm::executor::PeftCacheConfig::mHostCacheSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15mMaxAdapterSizeE">tensorrt_llm::executor::PeftCacheConfig::mMaxAdapterSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig23mMaxPagesPerBlockDeviceE">tensorrt_llm::executor::PeftCacheConfig::mMaxPagesPerBlockDevice (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig21mMaxPagesPerBlockHostE">tensorrt_llm::executor::PeftCacheConfig::mMaxPagesPerBlockHost (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15mNumCopyStreamsE">tensorrt_llm::executor::PeftCacheConfig::mNumCopyStreams (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig21mNumDeviceModuleLayerE">tensorrt_llm::executor::PeftCacheConfig::mNumDeviceModuleLayer (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig17mNumEnsureWorkersE">tensorrt_llm::executor::PeftCacheConfig::mNumEnsureWorkers (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mNumHostModuleLayerE">tensorrt_llm::executor::PeftCacheConfig::mNumHostModuleLayer (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig14mNumPutWorkersE">tensorrt_llm::executor::PeftCacheConfig::mNumPutWorkers (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mOptimalAdapterSizeE">tensorrt_llm::executor::PeftCacheConfig::mOptimalAdapterSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypeRKNSt8optionalIfEERKNSt8optionalI6size_tEE">tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19PhonyNameDueToError5valueE">tensorrt_llm::executor::PhonyNameDueToError::value (C++ member)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19PhonyNameDueToError5valueE">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19PhonyNameDueToError5valueE">[2]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19PhonyNameDueToError5valueE">[3]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18PromptTuningConfigE">tensorrt_llm::executor::PromptTuningConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor18PromptTuningConfig17getEmbeddingTableEv">tensorrt_llm::executor::PromptTuningConfig::getEmbeddingTable (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18PromptTuningConfig15mEmbeddingTableE">tensorrt_llm::executor::PromptTuningConfig::mEmbeddingTable (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor18PromptTuningConfig18PromptTuningConfigE6Tensor">tensorrt_llm::executor::PromptTuningConfig::PromptTuningConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14RandomSeedTypeE">tensorrt_llm::executor::RandomSeedType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7RequestE">tensorrt_llm::executor::Request (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request11getBadWordsEv">tensorrt_llm::executor::Request::getBadWords (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request16getEmbeddingBiasEv">tensorrt_llm::executor::Request::getEmbeddingBias (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request8getEndIdEv">tensorrt_llm::executor::Request::getEndId (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request16getInputTokenIdsEv">tensorrt_llm::executor::Request::getInputTokenIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request26getLogitsPostProcessorNameEv">tensorrt_llm::executor::Request::getLogitsPostProcessorName (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request13getLoraConfigEv">tensorrt_llm::executor::Request::getLoraConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request15getMaxNewTokensEv">tensorrt_llm::executor::Request::getMaxNewTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request15getOutputConfigEv">tensorrt_llm::executor::Request::getOutputConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request8getPadIdEv">tensorrt_llm::executor::Request::getPadId (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request21getPromptTuningConfigEv">tensorrt_llm::executor::Request::getPromptTuningConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request17getSamplingConfigEv">tensorrt_llm::executor::Request::getSamplingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request28getSpeculativeDecodingConfigEv">tensorrt_llm::executor::Request::getSpeculativeDecodingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request12getStopWordsEv">tensorrt_llm::executor::Request::getStopWords (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor7Request12getStreamingEv">tensorrt_llm::executor::Request::getStreaming (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request5mImplE">tensorrt_llm::executor::Request::mImpl (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7RequestaSERK7Request">tensorrt_llm::executor::Request::operator= (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7RequestaSERR7Request">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens8SizeTypebRK14SamplingConfigRK12OutputConfigRKNSt8optionalI8SizeTypeEERKNSt8optionalI8SizeTypeEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI10LoraConfigEENSt8optionalINSt6stringEEE">tensorrt_llm::executor::Request::Request (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request7RequestERK7Request">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request7RequestERR7Request">[2]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request11setBadWordsERKNSt4listI9VecTokensEE">tensorrt_llm::executor::Request::setBadWords (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request16setEmbeddingBiasERK6Tensor">tensorrt_llm::executor::Request::setEmbeddingBias (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request8setEndIdE8SizeType">tensorrt_llm::executor::Request::setEndId (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request26setLogitsPostProcessorNameERKNSt6stringE">tensorrt_llm::executor::Request::setLogitsPostProcessorName (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request13setLoraConfigERK10LoraConfig">tensorrt_llm::executor::Request::setLoraConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request15setOutputConfigERK12OutputConfig">tensorrt_llm::executor::Request::setOutputConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request8setPadIdE8SizeType">tensorrt_llm::executor::Request::setPadId (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request21setPromptTuningConfigERK18PromptTuningConfig">tensorrt_llm::executor::Request::setPromptTuningConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request17setSamplingConfigERK14SamplingConfig">tensorrt_llm::executor::Request::setSamplingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request28setSpeculativeDecodingConfigERK25SpeculativeDecodingConfig">tensorrt_llm::executor::Request::setSpeculativeDecodingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request12setStopWordsERKNSt4listI9VecTokensEE">tensorrt_llm::executor::Request::setStopWords (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7Request12setStreamingEb">tensorrt_llm::executor::Request::setStreaming (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor7RequestD0Ev">tensorrt_llm::executor::Request::~Request (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStageE">tensorrt_llm::executor::RequestStage (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStage20kCONTEXT_IN_PROGRESSE">tensorrt_llm::executor::RequestStage::kCONTEXT_IN_PROGRESS (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStage20kGENERATION_COMPLETEE">tensorrt_llm::executor::RequestStage::kGENERATION_COMPLETE (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStage23kGENERATION_IN_PROGRESSE">tensorrt_llm::executor::RequestStage::kGENERATION_IN_PROGRESS (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStage7kQUEUEDE">tensorrt_llm::executor::RequestStage::kQUEUED (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStatsE">tensorrt_llm::executor::RequestStats (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStats22contextPrefillPositionE">tensorrt_llm::executor::RequestStats::contextPrefillPosition (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStats2idE">tensorrt_llm::executor::RequestStats::id (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStats18numGeneratedTokensE">tensorrt_llm::executor::RequestStats::numGeneratedTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStats6pausedE">tensorrt_llm::executor::RequestStats::paused (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStats9scheduledE">tensorrt_llm::executor::RequestStats::scheduled (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor12RequestStats5stageE">tensorrt_llm::executor::RequestStats::stage (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor24RequestStatsPerIterationE">tensorrt_llm::executor::RequestStatsPerIteration (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor24RequestStatsPerIteration4iterE">tensorrt_llm::executor::RequestStatsPerIteration::iter (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor24RequestStatsPerIteration12requestStatsE">tensorrt_llm::executor::RequestStatsPerIteration::requestStats (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8ResponseE">tensorrt_llm::executor::Response (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8Response11getErrorMsgEv">tensorrt_llm::executor::Response::getErrorMsg (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8Response12getRequestIdEv">tensorrt_llm::executor::Response::getRequestId (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8Response9getResultEv">tensorrt_llm::executor::Response::getResult (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor8Response8hasErrorEv">tensorrt_llm::executor::Response::hasError (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Response5mImplE">tensorrt_llm::executor::Response::mImpl (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8ResponseaSERK8Response">tensorrt_llm::executor::Response::operator= (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8ResponseaSERR8Response">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6Result">tensorrt_llm::executor::Response::Response (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringE">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Response8ResponseERK8Response">[2]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8Response8ResponseERR8Response">[3]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8ResponseD0Ev">tensorrt_llm::executor::Response::~Response (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6ResultE">tensorrt_llm::executor::Result (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result13contextLogitsE">tensorrt_llm::executor::Result::contextLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result11cumLogProbsE">tensorrt_llm::executor::Result::cumLogProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result16generationLogitsE">tensorrt_llm::executor::Result::generationLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result7isFinalE">tensorrt_llm::executor::Result::isFinal (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result8logProbsE">tensorrt_llm::executor::Result::logProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Result14outputTokenIdsE">tensorrt_llm::executor::Result::outputTokenIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfigE">tensorrt_llm::executor::SamplingConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig26getBeamSearchDiversityRateEv">tensorrt_llm::executor::SamplingConfig::getBeamSearchDiversityRate (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getBeamWidthEv">tensorrt_llm::executor::SamplingConfig::getBeamWidth (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig16getEarlyStoppingEv">tensorrt_llm::executor::SamplingConfig::getEarlyStopping (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig19getFrequencyPenaltyEv">tensorrt_llm::executor::SamplingConfig::getFrequencyPenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig16getLengthPenaltyEv">tensorrt_llm::executor::SamplingConfig::getLengthPenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getMinLengthEv">tensorrt_llm::executor::SamplingConfig::getMinLength (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig18getPresencePenaltyEv">tensorrt_llm::executor::SamplingConfig::getPresencePenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig13getRandomSeedEv">tensorrt_llm::executor::SamplingConfig::getRandomSeed (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig20getRepetitionPenaltyEv">tensorrt_llm::executor::SamplingConfig::getRepetitionPenalty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig14getTemperatureEv">tensorrt_llm::executor::SamplingConfig::getTemperature (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getTopKEv">tensorrt_llm::executor::SamplingConfig::getTopK (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getTopPEv">tensorrt_llm::executor::SamplingConfig::getTopP (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getTopPDecayEv">tensorrt_llm::executor::SamplingConfig::getTopPDecay (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig10getTopPMinEv">tensorrt_llm::executor::SamplingConfig::getTopPMin (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfig15getTopPResetIdsEv">tensorrt_llm::executor::SamplingConfig::getTopPResetIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig24mBeamSearchDiversityRateE">tensorrt_llm::executor::SamplingConfig::mBeamSearchDiversityRate (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig10mBeamWidthE">tensorrt_llm::executor::SamplingConfig::mBeamWidth (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig14mEarlyStoppingE">tensorrt_llm::executor::SamplingConfig::mEarlyStopping (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig17mFrequencyPenaltyE">tensorrt_llm::executor::SamplingConfig::mFrequencyPenalty (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig14mLengthPenaltyE">tensorrt_llm::executor::SamplingConfig::mLengthPenalty (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig10mMinLengthE">tensorrt_llm::executor::SamplingConfig::mMinLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig16mPresencePenaltyE">tensorrt_llm::executor::SamplingConfig::mPresencePenalty (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig11mRandomSeedE">tensorrt_llm::executor::SamplingConfig::mRandomSeed (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig18mRepetitionPenaltyE">tensorrt_llm::executor::SamplingConfig::mRepetitionPenalty (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig12mTemperatureE">tensorrt_llm::executor::SamplingConfig::mTemperature (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig5mTopKE">tensorrt_llm::executor::SamplingConfig::mTopK (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig5mTopPE">tensorrt_llm::executor::SamplingConfig::mTopP (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig10mTopPDecayE">tensorrt_llm::executor::SamplingConfig::mTopPDecay (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig8mTopPMinE">tensorrt_llm::executor::SamplingConfig::mTopPMin (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig13mTopPResetIdsE">tensorrt_llm::executor::SamplingConfig::mTopPResetIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor14SamplingConfigeqERK14SamplingConfig">tensorrt_llm::executor::SamplingConfig::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE8SizeTypeRKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI8SizeTypeEE">tensorrt_llm::executor::SamplingConfig::SamplingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15SchedulerConfigE">tensorrt_llm::executor::SchedulerConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor15SchedulerConfig9getPolicyEv">tensorrt_llm::executor::SchedulerConfig::getPolicy (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15SchedulerConfig7mPolicyE">tensorrt_llm::executor::SchedulerConfig::mPolicy (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE15SchedulerPolicy">tensorrt_llm::executor::SchedulerConfig::SchedulerConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15SchedulerPolicyE">tensorrt_llm::executor::SchedulerPolicy (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15SchedulerPolicy20kGUARANTEED_NO_EVICTE">tensorrt_llm::executor::SchedulerPolicy::kGUARANTEED_NO_EVICT (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor15SchedulerPolicy16kMAX_UTILIZATIONE">tensorrt_llm::executor::SchedulerPolicy::kMAX_UTILIZATION (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor5ShapeE">tensorrt_llm::executor::Shape (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor5Shape4BaseE">tensorrt_llm::executor::Shape::Base (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor5Shape7DimTypeE">tensorrt_llm::executor::Shape::DimType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor5Shape5ShapeENSt16initializer_listI7DimTypeEE">tensorrt_llm::executor::Shape::Shape (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor5Shape5ShapeEPK7DimTypeN4Base9size_typeE">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor5Shape5ShapeEv">[2]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor8SizeTypeE">tensorrt_llm::executor::SizeType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfigE">tensorrt_llm::executor::SpeculativeDecodingConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor25SpeculativeDecodingConfig22getAcceptanceThresholdEv">tensorrt_llm::executor::SpeculativeDecodingConfig::getAcceptanceThreshold (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor25SpeculativeDecodingConfig9getLogitsEv">tensorrt_llm::executor::SpeculativeDecodingConfig::getLogits (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor25SpeculativeDecodingConfig9getTokensEv">tensorrt_llm::executor::SpeculativeDecodingConfig::getTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig20mAcceptanceThresholdE">tensorrt_llm::executor::SpeculativeDecodingConfig::mAcceptanceThreshold (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig7mLogitsE">tensorrt_llm::executor::SpeculativeDecodingConfig::mLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig7mTokensE">tensorrt_llm::executor::SpeculativeDecodingConfig::mTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig25SpeculativeDecodingConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEE">tensorrt_llm::executor::SpeculativeDecodingConfig::SpeculativeDecodingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19StaticBatchingStatsE">tensorrt_llm::executor::StaticBatchingStats (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19StaticBatchingStats13emptyGenSlotsE">tensorrt_llm::executor::StaticBatchingStats::emptyGenSlots (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19StaticBatchingStats18numContextRequestsE">tensorrt_llm::executor::StaticBatchingStats::numContextRequests (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19StaticBatchingStats12numCtxTokensE">tensorrt_llm::executor::StaticBatchingStats::numCtxTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19StaticBatchingStats12numGenTokensE">tensorrt_llm::executor::StaticBatchingStats::numGenTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor19StaticBatchingStats20numScheduledRequestsE">tensorrt_llm::executor::StaticBatchingStats::numScheduledRequests (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor9StreamPtrE">tensorrt_llm::executor::StreamPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6TensorE">tensorrt_llm::executor::Tensor (C++ class)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor6copyToENSt10shared_ptrI4ImplEE13CudaStreamPtr">tensorrt_llm::executor::Tensor::copyTo (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor9copyToCpuEN6Tensor13CudaStreamPtrE">tensorrt_llm::executor::Tensor::copyToCpu (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor9copyToGpuEN6Tensor13CudaStreamPtrE">tensorrt_llm::executor::Tensor::copyToGpu (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor13copyToManagedEN6Tensor13CudaStreamPtrE">tensorrt_llm::executor::Tensor::copyToManaged (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor12copyToPinnedEN6Tensor13CudaStreamPtrE">tensorrt_llm::executor::Tensor::copyToPinned (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor18copyToPooledPinnedEN6Tensor13CudaStreamPtrE">tensorrt_llm::executor::Tensor::copyToPooledPinned (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4I0EN12tensorrt_llm8executor6Tensor3cpuE6Tensor5Shape">tensorrt_llm::executor::Tensor::cpu (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor3cpuE8DataType5Shape">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor13CudaStreamPtrE">tensorrt_llm::executor::Tensor::CudaStreamPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE">tensorrt_llm::executor::Tensor::detail::ofITensor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor6detail9toITensorERK6Tensor">tensorrt_llm::executor::Tensor::detail::toITensor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor7getDataEv">tensorrt_llm::executor::Tensor::getData (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor7getDataEv">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor11getDataTypeEv">tensorrt_llm::executor::Tensor::getDataType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor13getMemoryTypeEv">tensorrt_llm::executor::Tensor::getMemoryType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4I0EN12tensorrt_llm8executor6Tensor14getRuntimeTypeE8DataTypev">tensorrt_llm::executor::Tensor::getRuntimeType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor8getShapeEv">tensorrt_llm::executor::Tensor::getShape (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor7getSizeEv">tensorrt_llm::executor::Tensor::getSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6Tensor14getSizeInBytesEv">tensorrt_llm::executor::Tensor::getSizeInBytes (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape">tensorrt_llm::executor::Tensor::gpu (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor4ImplE">tensorrt_llm::executor::Tensor::Impl (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4I0EN12tensorrt_llm8executor6Tensor7managedE6Tensor5Shape">tensorrt_llm::executor::Tensor::managed (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor7managedE8DataType5Shape">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor7mTensorE">tensorrt_llm::executor::Tensor::mTensor (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape">tensorrt_llm::executor::Tensor::of (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorR1T">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape">[2]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6TensorcvbEv">tensorrt_llm::executor::Tensor::operator bool (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6TensorneERK6Tensor">tensorrt_llm::executor::Tensor::operator!= (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6TensoraSERK6Tensor">tensorrt_llm::executor::Tensor::operator= (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6TensoraSERR6Tensor">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4NK12tensorrt_llm8executor6TensoreqERK6Tensor">tensorrt_llm::executor::Tensor::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4I0EN12tensorrt_llm8executor6Tensor6pinnedE6Tensor5Shape">tensorrt_llm::executor::Tensor::pinned (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor6pinnedE8DataType5Shape">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4I0EN12tensorrt_llm8executor6Tensor12pooledPinnedE6Tensor5Shape">tensorrt_llm::executor::Tensor::pooledPinned (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor12pooledPinnedE8DataType5Shape">[1]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor7setFromERK6Tensor13CudaStreamPtr">tensorrt_llm::executor::Tensor::setFrom (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor7setZeroE13CudaStreamPtr">tensorrt_llm::executor::Tensor::setZero (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor6TensorENSt10shared_ptrIN7runtime7ITensorEEE">tensorrt_llm::executor::Tensor::Tensor (C++ function)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor6TensorERK6Tensor">[1]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor6TensorERR6Tensor">[2]</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6Tensor6TensorEv">[3]</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor6TensorD0Ev">tensorrt_llm::executor::Tensor::~Tensor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor9TensorPtrE">tensorrt_llm::executor::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11TokenIdTypeE">tensorrt_llm::executor::TokenIdType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4I0_bEN12tensorrt_llm8executor10TypeTraitsE">tensorrt_llm::executor::TypeTraits (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4IEN12tensorrt_llm8executor10TypeTraitsIbEE">tensorrt_llm::executor::TypeTraits&lt;bool&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10TypeTraitsIbE5valueE">tensorrt_llm::executor::TypeTraits&lt;bool&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4IEN12tensorrt_llm8executor10TypeTraitsIfEE">tensorrt_llm::executor::TypeTraits&lt;float&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10TypeTraitsIfE5valueE">tensorrt_llm::executor::TypeTraits&lt;float&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4IEN12tensorrt_llm8executor10TypeTraitsI4halfEE">tensorrt_llm::executor::TypeTraits&lt;half&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10TypeTraitsI4halfE5valueE">tensorrt_llm::executor::TypeTraits&lt;half&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7int32_tEEE">tensorrt_llm::executor::TypeTraits&lt;std::int32_t&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7int32_tEE5valueE">tensorrt_llm::executor::TypeTraits&lt;std::int32_t&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7int64_tEEE">tensorrt_llm::executor::TypeTraits&lt;std::int64_t&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7int64_tEE5valueE">tensorrt_llm::executor::TypeTraits&lt;std::int64_t&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt6int8_tEEE">tensorrt_llm::executor::TypeTraits&lt;std::int8_t&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt6int8_tEE5valueE">tensorrt_llm::executor::TypeTraits&lt;std::int8_t&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7uint8_tEEE">tensorrt_llm::executor::TypeTraits&lt;std::uint8_t&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7uint8_tEE5valueE">tensorrt_llm::executor::TypeTraits&lt;std::uint8_t&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4I0EN12tensorrt_llm8executor10TypeTraitsIP1TEE">tensorrt_llm::executor::TypeTraits&lt;T*&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor10TypeTraitsIP1TE5valueE">tensorrt_llm::executor::TypeTraits&lt;T*&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor11VecLogProbsE">tensorrt_llm::executor::VecLogProbs (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm8executor9VecTokensE">tensorrt_llm::executor::VecTokens (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm6layersE">tensorrt_llm::layers (C++ type)</a>
</li>
      <li><a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm7runtimeE">tensorrt_llm::runtime (C++ type)</a>, <a href="_cpp_gen/executor.html#_CPPv4N12tensorrt_llm7runtimeE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[4]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[5]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[6]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[7]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[8]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[9]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[10]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[11]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[12]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[13]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[14]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[15]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[16]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[17]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[18]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[19]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[20]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[21]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[22]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[23]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[24]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[25]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[26]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[27]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimeE">[28]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer">tensorrt_llm::runtime::bufferCast (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14BufferDataTypeE">tensorrt_llm::runtime::BufferDataType (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb">tensorrt_llm::runtime::BufferDataType::BufferDataType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14BufferDataType11getDataTypeEv">tensorrt_llm::runtime::BufferDataType::getDataType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14BufferDataType7getSizeEv">tensorrt_llm::runtime::BufferDataType::getSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14BufferDataType9isPointerEv">tensorrt_llm::runtime::BufferDataType::isPointer (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14BufferDataType10isUnsignedEv">tensorrt_llm::runtime::BufferDataType::isUnsigned (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14BufferDataType15kTrtPointerTypeE">tensorrt_llm::runtime::BufferDataType::kTrtPointerType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14BufferDataType9mDataTypeE">tensorrt_llm::runtime::BufferDataType::mDataType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14BufferDataType8mPointerE">tensorrt_llm::runtime::BufferDataType::mPointer (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14BufferDataType9mUnsignedE">tensorrt_llm::runtime::BufferDataType::mUnsigned (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14BufferDataTypecvN8nvinfer18DataTypeEEv">tensorrt_llm::runtime::BufferDataType::operator nvinfer1::DataType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManagerE">tensorrt_llm::runtime::BufferManager (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE">tensorrt_llm::runtime::BufferManager::allocate (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtrb">tensorrt_llm::runtime::BufferManager::BufferManager (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer">tensorrt_llm::runtime::BufferManager::copy (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer">[4]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType">tensorrt_llm::runtime::BufferManager::copyFrom (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType">[4]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE">tensorrt_llm::runtime::BufferManager::cpu (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager13CudaStreamPtrE">tensorrt_llm::runtime::BufferManager::CudaStreamPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE">tensorrt_llm::runtime::BufferManager::emptyBuffer (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE">tensorrt_llm::runtime::BufferManager::emptyTensor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager9getStreamEv">tensorrt_llm::runtime::BufferManager::getStream (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE">tensorrt_llm::runtime::BufferManager::gpu (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncEN8nvinfer14DimsEN8nvinfer18DataTypeE">tensorrt_llm::runtime::BufferManager::gpuSync (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncENSt6size_tEN8nvinfer18DataTypeE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager10IBufferPtrE">tensorrt_llm::runtime::BufferManager::IBufferPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager14initMemoryPoolEi">tensorrt_llm::runtime::BufferManager::initMemoryPool (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager10ITensorPtrE">tensorrt_llm::runtime::BufferManager::ITensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager10kBYTE_TYPEE">tensorrt_llm::runtime::BufferManager::kBYTE_TYPE (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager7managedEN8nvinfer14DimsEN8nvinfer18DataTypeE">tensorrt_llm::runtime::BufferManager::managed (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager7managedENSt6size_tEN8nvinfer18DataTypeE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager14memoryPoolFreeEi">tensorrt_llm::runtime::BufferManager::memoryPoolFree (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolFreeEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager18memoryPoolReservedEi">tensorrt_llm::runtime::BufferManager::memoryPoolReserved (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager18memoryPoolReservedEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToENSt6size_tE">tensorrt_llm::runtime::BufferManager::memoryPoolTrimTo (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToEiNSt6size_tE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager14memoryPoolUsedEi">tensorrt_llm::runtime::BufferManager::memoryPoolUsed (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolUsedEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager7mStreamE">tensorrt_llm::runtime::BufferManager::mStream (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager9mTrimPoolE">tensorrt_llm::runtime::BufferManager::mTrimPool (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE">tensorrt_llm::runtime::BufferManager::pinned (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolEN8nvinfer14DimsEN8nvinfer18DataTypeE">tensorrt_llm::runtime::BufferManager::pinnedPool (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolENSt6size_tEN8nvinfer18DataTypeE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager6setMemER7IBuffer7int32_t">tensorrt_llm::runtime::BufferManager::setMem (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13BufferManager7setZeroER7IBuffer">tensorrt_llm::runtime::BufferManager::setZero (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13BufferManagerD0Ev">tensorrt_llm::runtime::BufferManager::~BufferManager (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime11BufferRangeE">tensorrt_llm::runtime::BufferRange (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11BufferRange4BaseE">tensorrt_llm::runtime::BufferRange::Base (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeEP1T9size_type">tensorrt_llm::runtime::BufferRange::BufferRange (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE">tensorrt_llm::runtime::constPointerCast (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9CudaEventE">tensorrt_llm::runtime::CudaEvent (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb">tensorrt_llm::runtime::CudaEvent::CudaEvent (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventEj">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9CudaEvent7DeleterE">tensorrt_llm::runtime::CudaEvent::Deleter (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEb">tensorrt_llm::runtime::CudaEvent::Deleter::Deleter (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter10mOwnsEventE">tensorrt_llm::runtime::CudaEvent::Deleter::mOwnsEvent (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9CudaEvent7DeleterclE7pointer">tensorrt_llm::runtime::CudaEvent::Deleter::operator() (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9CudaEvent12element_typeE">tensorrt_llm::runtime::CudaEvent::element_type (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9CudaEvent8EventPtrE">tensorrt_llm::runtime::CudaEvent::EventPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9CudaEvent3getEv">tensorrt_llm::runtime::CudaEvent::get (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9CudaEvent6mEventE">tensorrt_llm::runtime::CudaEvent::mEvent (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9CudaEvent7pointerE">tensorrt_llm::runtime::CudaEvent::pointer (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9CudaEvent11synchronizeEv">tensorrt_llm::runtime::CudaEvent::synchronize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10CudaStreamE">tensorrt_llm::runtime::CudaStream (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_t">tensorrt_llm::runtime::CudaStream::CudaStream (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji">[2]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10CudaStream7DeleterE">tensorrt_llm::runtime::CudaStream::Deleter (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEb">tensorrt_llm::runtime::CudaStream::Deleter::Deleter (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter11mOwnsStreamE">tensorrt_llm::runtime::CudaStream::Deleter::mOwnsStream (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10CudaStream7DeleterclE12cudaStream_t">tensorrt_llm::runtime::CudaStream::Deleter::operator() (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10CudaStream3getEv">tensorrt_llm::runtime::CudaStream::get (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10CudaStream9getDeviceEv">tensorrt_llm::runtime::CudaStream::getDevice (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10CudaStream7mDeviceE">tensorrt_llm::runtime::CudaStream::mDevice (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10CudaStream7mStreamE">tensorrt_llm::runtime::CudaStream::mStream (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordEN9CudaEvent7pointerE">tensorrt_llm::runtime::CudaStream::record (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordERK9CudaEvent">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10CudaStream9StreamPtrE">tensorrt_llm::runtime::CudaStream::StreamPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10CudaStream11synchronizeEv">tensorrt_llm::runtime::CudaStream::synchronize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitEN9CudaEvent7pointerE">tensorrt_llm::runtime::CudaStream::wait (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitERK9CudaEvent">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE">tensorrt_llm::runtime::DataTypeTraits (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE">tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4nameE">tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::name (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4sizeE">tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::size (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4typeE">tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::type (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedEE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4nameE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::name (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4sizeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::size (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4typeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::type (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEEE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4nameE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;::name (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4sizeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;::size (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4typeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;::type (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEEE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4nameE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;::name (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4sizeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;::size (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4typeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;::type (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEEE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4nameE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;::name (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4sizeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;::size (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4typeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;::type (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EEE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4nameE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::name (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4sizeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::size (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4typeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::type (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEEE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4nameE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;::name (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4sizeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;::size (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4typeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;::type (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EEE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4nameE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;::name (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4sizeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;::size (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4typeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;::type (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EEE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4nameE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;::name (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4sizeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;::size (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4typeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;::type (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedEE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4nameE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::name (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4sizeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::size (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4typeE">tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::type (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoderE">tensorrt_llm::runtime::decoder (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder5InputE">tensorrt_llm::runtime::decoder::Input (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder5Input16cacheIndirectionE">tensorrt_llm::runtime::decoder::Input::cacheIndirection (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder5Input5InputE9TensorPtr">tensorrt_llm::runtime::decoder::Input::Input (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder5Input6logitsE">tensorrt_llm::runtime::decoder::Input::logits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder5Input9TensorPtrE">tensorrt_llm::runtime::decoder::Input::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder6OutputE">tensorrt_llm::runtime::decoder::Output (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder6Output16cacheIndirectionE">tensorrt_llm::runtime::decoder::Output::cacheIndirection (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder6Output6OutputEv">tensorrt_llm::runtime::decoder::Output::Output (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder6Output15sequenceLengthsE">tensorrt_llm::runtime::decoder::Output::sequenceLengths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7decoder6Output9TensorPtrE">tensorrt_llm::runtime::decoder::Output::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batchE">tensorrt_llm::runtime::decoder_batch (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5InputE">tensorrt_llm::runtime::decoder_batch::Input (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input6activeE">tensorrt_llm::runtime::decoder_batch::Input::active (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input16cacheIndirectionE">tensorrt_llm::runtime::decoder_batch::Input::cacheIndirection (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI14TensorConstPtrEE">tensorrt_llm::runtime::decoder_batch::Input::Input (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI14TensorConstPtrEERKNSt6vectorIbEE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI9TensorPtrEE">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input5InputERKNSt6vectorI9TensorPtrEERKNSt6vectorIbEE">[3]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input6logitsE">tensorrt_llm::runtime::decoder_batch::Input::logits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input12medusaLogitsE">tensorrt_llm::runtime::decoder_batch::Input::medusaLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input14TensorConstPtrE">tensorrt_llm::runtime::decoder_batch::Input::TensorConstPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5Input9TensorPtrE">tensorrt_llm::runtime::decoder_batch::Input::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch6OutputE">tensorrt_llm::runtime::decoder_batch::Output (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7RequestE">tensorrt_llm::runtime::decoder_batch::Request (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request12badWordsListE">tensorrt_llm::runtime::decoder_batch::Request::badWordsList (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request9BufferPtrE">tensorrt_llm::runtime::decoder_batch::Request::BufferPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request18computeCumLogProbsE">tensorrt_llm::runtime::decoder_batch::Request::computeCumLogProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request15computeLogProbsE">tensorrt_llm::runtime::decoder_batch::Request::computeLogProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request14ConstTensorPtrE">tensorrt_llm::runtime::decoder_batch::Request::ConstTensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request11draftLogitsE">tensorrt_llm::runtime::decoder_batch::Request::draftLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request11draftTokensE">tensorrt_llm::runtime::decoder_batch::Request::draftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13embeddingBiasE">tensorrt_llm::runtime::decoder_batch::Request::embeddingBias (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request5endIdE">tensorrt_llm::runtime::decoder_batch::Request::endId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request28generatedTokensPerEngineStepE">tensorrt_llm::runtime::decoder_batch::Request::generatedTokensPerEngineStep (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request3idsE">tensorrt_llm::runtime::decoder_batch::Request::ids (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request8inputLenE">tensorrt_llm::runtime::decoder_batch::Request::inputLen (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request12maxNewTokensE">tensorrt_llm::runtime::decoder_batch::Request::maxNewTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request11medusaPathsE">tensorrt_llm::runtime::decoder_batch::Request::medusaPaths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13medusaTreeIdsE">tensorrt_llm::runtime::decoder_batch::Request::medusaTreeIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request7RequestE14ConstTensorPtr8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE">tensorrt_llm::runtime::decoder_batch::Request::Request (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request13stopWordsListE">tensorrt_llm::runtime::decoder_batch::Request::stopWordsList (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch7Request9TensorPtrE">tensorrt_llm::runtime::decoder_batch::Request::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5TokenE">tensorrt_llm::runtime::decoder_batch::Token (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5Token6activeE">tensorrt_llm::runtime::decoder_batch::Token::active (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5Token5eventE">tensorrt_llm::runtime::decoder_batch::Token::event (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13decoder_batch5Token5TokenERR9CudaEventRKNSt6vectorIbEE">tensorrt_llm::runtime::decoder_batch::Token::Token (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInputE">tensorrt_llm::runtime::DecodingInput (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsLensE">tensorrt_llm::runtime::DecodingInput::badWordsLens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsListE">tensorrt_llm::runtime::DecodingInput::badWordsList (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsPtrsE">tensorrt_llm::runtime::DecodingInput::badWordsPtrs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput10batchSlotsE">tensorrt_llm::runtime::DecodingInput::batchSlots (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput16cacheIndirectionE">tensorrt_llm::runtime::DecodingInput::cacheIndirection (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputE8SizeType8SizeType8SizeType8SizeType9TensorPtr9TensorPtr">tensorrt_llm::runtime::DecodingInput::DecodingInput (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput13embeddingBiasE">tensorrt_llm::runtime::DecodingInput::embeddingBias (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput6endIdsE">tensorrt_llm::runtime::DecodingInput::endIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput8finishedE">tensorrt_llm::runtime::DecodingInput::finished (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput7lengthsE">tensorrt_llm::runtime::DecodingInput::lengths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput6logitsE">tensorrt_llm::runtime::DecodingInput::logits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput9logitsVecE">tensorrt_llm::runtime::DecodingInput::logitsVec (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput18maxAttentionWindowE">tensorrt_llm::runtime::DecodingInput::maxAttentionWindow (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput14maxBadWordsLenE">tensorrt_llm::runtime::DecodingInput::maxBadWordsLen (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput12maxBatchSizeE">tensorrt_llm::runtime::DecodingInput::maxBatchSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput9maxLengthE">tensorrt_llm::runtime::DecodingInput::maxLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput15maxStopWordsLenE">tensorrt_llm::runtime::DecodingInput::maxStopWordsLen (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputsE">tensorrt_llm::runtime::DecodingInput::MedusaInputs (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput12medusaInputsE">tensorrt_llm::runtime::DecodingInput::medusaInputs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs22medusaCurTokensPerStepE">tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaCurTokensPerStep (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs12medusaLogitsE">tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs11medusaPathsE">tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaPaths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs25medusaTargetTokensPerStepE">tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaTargetTokensPerStep (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs13medusaTreeIdsE">tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaTreeIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput17noRepeatNgramSizeE">tensorrt_llm::runtime::DecodingInput::noRepeatNgramSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput19sequenceLimitLengthE">tensorrt_llm::runtime::DecodingInput::sequenceLimitLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput15sinkTokenLengthE">tensorrt_llm::runtime::DecodingInput::sinkTokenLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput4stepE">tensorrt_llm::runtime::DecodingInput::step (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsLensE">tensorrt_llm::runtime::DecodingInput::stopWordsLens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsListE">tensorrt_llm::runtime::DecodingInput::stopWordsList (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsPtrsE">tensorrt_llm::runtime::DecodingInput::stopWordsPtrs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13DecodingInput9TensorPtrE">tensorrt_llm::runtime::DecodingInput::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12DecodingModeE">tensorrt_llm::runtime::DecodingMode (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime12DecodingMode9allBitSetE14UnderlyingType">tensorrt_llm::runtime::DecodingMode::allBitSet (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime12DecodingMode9anyBitSetE14UnderlyingType">tensorrt_llm::runtime::DecodingMode::anyBitSet (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12DecodingMode10BeamSearchEv">tensorrt_llm::runtime::DecodingMode::BeamSearch (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12DecodingMode12DecodingModeE14UnderlyingType">tensorrt_llm::runtime::DecodingMode::DecodingMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12DecodingMode12isBeamSearchEv">tensorrt_llm::runtime::DecodingMode::isBeamSearch (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12DecodingMode8isMedusaEv">tensorrt_llm::runtime::DecodingMode::isMedusa (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12DecodingMode6isNoneEv">tensorrt_llm::runtime::DecodingMode::isNone (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12DecodingMode6isTopKEv">tensorrt_llm::runtime::DecodingMode::isTopK (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12DecodingMode13isTopKandTopPEv">tensorrt_llm::runtime::DecodingMode::isTopKandTopP (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12DecodingMode12isTopKorTopPEv">tensorrt_llm::runtime::DecodingMode::isTopKorTopP (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12DecodingMode6isTopPEv">tensorrt_llm::runtime::DecodingMode::isTopP (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12DecodingMode11kBeamSearchE">tensorrt_llm::runtime::DecodingMode::kBeamSearch (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12DecodingMode7kMedusaE">tensorrt_llm::runtime::DecodingMode::kMedusa (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12DecodingMode5kNoneE">tensorrt_llm::runtime::DecodingMode::kNone (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12DecodingMode5kTopKE">tensorrt_llm::runtime::DecodingMode::kTopK (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12DecodingMode9kTopKTopPE">tensorrt_llm::runtime::DecodingMode::kTopKTopP (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12DecodingMode5kTopPE">tensorrt_llm::runtime::DecodingMode::kTopP (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12DecodingMode6MedusaEv">tensorrt_llm::runtime::DecodingMode::Medusa (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12DecodingMode6mStateE">tensorrt_llm::runtime::DecodingMode::mState (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12DecodingMode4NoneEv">tensorrt_llm::runtime::DecodingMode::None (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime12DecodingModeeqERK12DecodingMode">tensorrt_llm::runtime::DecodingMode::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12DecodingMode4TopKEv">tensorrt_llm::runtime::DecodingMode::TopK (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12DecodingMode8TopKTopPEv">tensorrt_llm::runtime::DecodingMode::TopKTopP (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12DecodingMode4TopPEv">tensorrt_llm::runtime::DecodingMode::TopP (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime12DecodingMode14UnderlyingTypeE">tensorrt_llm::runtime::DecodingMode::UnderlyingType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutputE">tensorrt_llm::runtime::DecodingOutput (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypothesesE">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14beamHypothesesE">tensorrt_llm::runtime::DecodingOutput::beamHypotheses (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11cumLogProbsE">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::cumLogProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5emptyER13BufferManager">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::empty (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initER13BufferManager11TokenIdType">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::init (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses6isDoneE">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::isDone (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses8logProbsE">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::logProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses15minNormedScoresE">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::minNormedScores (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses12normedScoresE">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::normedScores (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses8numBeamsE">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::numBeams (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses12outputIdsTgtE">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::outputIdsTgt (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7releaseEv">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::release (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE8SizeType8SizeType8SizeType">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18sequenceLengthsTgtE">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::sequenceLengthsTgt (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE8SizeType8SizeType">tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::slice (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput16cacheIndirectionE">tensorrt_llm::runtime::DecodingOutput::cacheIndirection (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput11cumLogProbsE">tensorrt_llm::runtime::DecodingOutput::cumLogProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14DecodingOutputE9TensorPtr">tensorrt_llm::runtime::DecodingOutput::DecodingOutput (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput8finishedE">tensorrt_llm::runtime::DecodingOutput::finished (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput11finishedSumE">tensorrt_llm::runtime::DecodingOutput::finishedSum (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput3idsE">tensorrt_llm::runtime::DecodingOutput::ids (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput17kNegativeInfinityE">tensorrt_llm::runtime::DecodingOutput::kNegativeInfinity (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput7lengthsE">tensorrt_llm::runtime::DecodingOutput::lengths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput8logProbsE">tensorrt_llm::runtime::DecodingOutput::logProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput13MedusaOutputsE">tensorrt_llm::runtime::DecodingOutput::MedusaOutputs (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput13medusaOutputsE">tensorrt_llm::runtime::DecodingOutput::medusaOutputs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput13MedusaOutputs27medusaAcceptedLengthsCumSumE">tensorrt_llm::runtime::DecodingOutput::MedusaOutputs::medusaAcceptedLengthsCumSum (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput13MedusaOutputs23medusaAcceptedTokensLenE">tensorrt_llm::runtime::DecodingOutput::MedusaOutputs::medusaAcceptedTokensLen (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput13MedusaOutputs21medusaNextDraftTokensE">tensorrt_llm::runtime::DecodingOutput::MedusaOutputs::medusaNextDraftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput13MedusaOutputs18medusaPathsOffsetsE">tensorrt_llm::runtime::DecodingOutput::MedusaOutputs::medusaPathsOffsets (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput9newTokensE">tensorrt_llm::runtime::DecodingOutput::newTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput14newTokensStepsE">tensorrt_llm::runtime::DecodingOutput::newTokensSteps (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput12newTokensVecE">tensorrt_llm::runtime::DecodingOutput::newTokensVec (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput9parentIdsE">tensorrt_llm::runtime::DecodingOutput::parentIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14DecodingOutput9TensorPtrE">tensorrt_llm::runtime::DecodingOutput::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GenerationInputE">tensorrt_llm::runtime::GenerationInput (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GenerationInput4BaseE">tensorrt_llm::runtime::GenerationInput::Base (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GenerationInput15GenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb">tensorrt_llm::runtime::GenerationInput::GenerationInput (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GenerationInput9TensorPtrE">tensorrt_llm::runtime::GenerationInput::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16GenerationOutputE">tensorrt_llm::runtime::GenerationOutput (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16GenerationOutput4BaseE">tensorrt_llm::runtime::GenerationOutput::Base (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16GenerationOutput16GenerationOutputE9TensorPtr9TensorPtr">tensorrt_llm::runtime::GenerationOutput::GenerationOutput (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16GenerationOutput9TensorPtrE">tensorrt_llm::runtime::GenerationOutput::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I00EN12tensorrt_llm7runtime22GenericGenerationInputE">tensorrt_llm::runtime::GenericGenerationInput (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput12badWordsListE">tensorrt_llm::runtime::GenericGenerationInput::badWordsList (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput13embeddingBiasE">tensorrt_llm::runtime::GenericGenerationInput::embeddingBias (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput5endIdE">tensorrt_llm::runtime::GenericGenerationInput::endId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput22GenericGenerationInputEK8SizeTypeK8SizeType9TensorPtr9TensorPtrb">tensorrt_llm::runtime::GenericGenerationInput::GenericGenerationInput (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput3idsE">tensorrt_llm::runtime::GenericGenerationInput::ids (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput7lengthsE">tensorrt_llm::runtime::GenericGenerationInput::lengths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput12maxNewTokensE">tensorrt_llm::runtime::GenericGenerationInput::maxNewTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput6packedE">tensorrt_llm::runtime::GenericGenerationInput::packed (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput5padIdE">tensorrt_llm::runtime::GenericGenerationInput::padId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput18promptTuningParamsE">tensorrt_llm::runtime::GenericGenerationInput::promptTuningParams (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput13stopWordsListE">tensorrt_llm::runtime::GenericGenerationInput::stopWordsList (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime22GenericGenerationInput9TensorPtrE">tensorrt_llm::runtime::GenericGenerationInput::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime23GenericGenerationOutputE">tensorrt_llm::runtime::GenericGenerationOutput (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput8CallbackE">tensorrt_llm::runtime::GenericGenerationOutput::Callback (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput13contextLogitsE">tensorrt_llm::runtime::GenericGenerationOutput::contextLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput11cumLogProbsE">tensorrt_llm::runtime::GenericGenerationOutput::cumLogProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput16generationLogitsE">tensorrt_llm::runtime::GenericGenerationOutput::generationLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput23GenericGenerationOutputE9TensorPtr9TensorPtr">tensorrt_llm::runtime::GenericGenerationOutput::GenericGenerationOutput (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput3idsE">tensorrt_llm::runtime::GenericGenerationOutput::ids (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput7lengthsE">tensorrt_llm::runtime::GenericGenerationOutput::lengths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput8logProbsE">tensorrt_llm::runtime::GenericGenerationOutput::logProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput16onTokenGeneratedE">tensorrt_llm::runtime::GenericGenerationOutput::onTokenGenerated (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime23GenericGenerationOutput9TensorPtrE">tensorrt_llm::runtime::GenericGenerationOutput::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime25GenericPromptTuningParamsE">tensorrt_llm::runtime::GenericPromptTuningParams (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams14embeddingTableE">tensorrt_llm::runtime::GenericPromptTuningParams::embeddingTable (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr">tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams19promptTuningEnabledE">tensorrt_llm::runtime::GenericPromptTuningParams::promptTuningEnabled (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams8SizeTypeE">tensorrt_llm::runtime::GenericPromptTuningParams::SizeType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams5tasksE">tensorrt_llm::runtime::GenericPromptTuningParams::tasks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9TensorPtrE">tensorrt_llm::runtime::GenericPromptTuningParams::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9vocabSizeE">tensorrt_llm::runtime::GenericPromptTuningParams::vocabSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime10GptDecoderE">tensorrt_llm::runtime::GptDecoder (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder13CudaStreamPtrE">tensorrt_llm::runtime::GptDecoder::CudaStreamPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder7forwardER14DecodingOutputRK13DecodingInput">tensorrt_llm::runtime::GptDecoder::forward (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput">tensorrt_llm::runtime::GptDecoder::forwardAsync (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager">tensorrt_llm::runtime::GptDecoder::gatherTree (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder17getSamplingConfigEv">tensorrt_llm::runtime::GptDecoder::getSamplingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERK12DecodingMode6size_t6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt8optionalIN7runtime8SizeTypeEEENSt8optionalIN7runtime8SizeTypeEEE">tensorrt_llm::runtime::GptDecoder::GptDecoder (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder19mDynamicDecodeLayerE">tensorrt_llm::runtime::GptDecoder::mDynamicDecodeLayer (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder14mLogProbsTiledE">tensorrt_llm::runtime::GptDecoder::mLogProbsTiled (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder8mManagerE">tensorrt_llm::runtime::GptDecoder::mManager (C++ member)</a>
</li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder13mMaxBatchSizeE">tensorrt_llm::runtime::GptDecoder::mMaxBatchSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder5mPropE">tensorrt_llm::runtime::GptDecoder::mProp (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder15mSamplingConfigE">tensorrt_llm::runtime::GptDecoder::mSamplingConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_t8SizeTypeRKNSt8optionalI9TensorPtrEE">tensorrt_llm::runtime::GptDecoder::setup (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptDecoder9TensorPtrE">tensorrt_llm::runtime::GptDecoder::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatchE">tensorrt_llm::runtime::GptDecoderBatch (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch21allocateMedusaBuffersEv">tensorrt_llm::runtime::GptDecoderBatch::allocateMedusaBuffers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13CudaStreamPtrE">tensorrt_llm::runtime::GptDecoderBatch::CudaStreamPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16DecodingInputPtrE">tensorrt_llm::runtime::GptDecoderBatch::DecodingInputPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch17DecodingOutputPtrE">tensorrt_llm::runtime::GptDecoderBatch::DecodingOutputPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch8finalizeE8SizeType">tensorrt_llm::runtime::GptDecoderBatch::finalize (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch8finalizeEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE">tensorrt_llm::runtime::GptDecoderBatch::forwardAsync (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12forwardAsyncERN7decoder6OutputERKN7decoder5InputE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch24forwardAsyncFusedDecoderE8SizeTypeRN13decoder_batch6OutputERKN13decoder_batch5InputERK9CudaEvent">tensorrt_llm::runtime::GptDecoderBatch::forwardAsyncFusedDecoder (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch26forwardAsyncUnfusedDecoderE8SizeTypeRN13decoder_batch6OutputERKN13decoder_batch5InputERK9CudaEvent">tensorrt_llm::runtime::GptDecoderBatch::forwardAsyncUnfusedDecoder (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11forwardSyncERKN13decoder_batch5TokenE">tensorrt_llm::runtime::GptDecoderBatch::forwardSync (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11forwardSyncEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch15getAllNewTokensEv">tensorrt_llm::runtime::GptDecoderBatch::getAllNewTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch14getCumLogProbsE8SizeType">tensorrt_llm::runtime::GptDecoderBatch::getCumLogProbs (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch14getCumLogProbsEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch11getFinishedEv">tensorrt_llm::runtime::GptDecoderBatch::getFinished (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch11getLogProbsE8SizeType">tensorrt_llm::runtime::GptDecoderBatch::getLogProbs (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch11getLogProbsEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch30getMedusaAcceptedLengthsCumSumEv">tensorrt_llm::runtime::GptDecoderBatch::getMedusaAcceptedLengthsCumSum (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch28getMedusaAcceptedPackedPathsEv">tensorrt_llm::runtime::GptDecoderBatch::getMedusaAcceptedPackedPaths (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch13getNbFinishedEv">tensorrt_llm::runtime::GptDecoderBatch::getNbFinished (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch10getNbStepsEv">tensorrt_llm::runtime::GptDecoderBatch::getNbSteps (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getNewTokensE8SizeType">tensorrt_llm::runtime::GptDecoderBatch::getNewTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch18getNextDraftTokensEv">tensorrt_llm::runtime::GptDecoderBatch::getNextDraftTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getOutputIdsE8SizeType">tensorrt_llm::runtime::GptDecoderBatch::getOutputIds (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getOutputIdsEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch12getParentIdsEv">tensorrt_llm::runtime::GptDecoderBatch::getParentIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15GptDecoderBatchENSt6size_tENSt6size_tE13CudaStreamPtr">tensorrt_llm::runtime::GptDecoderBatch::GptDecoderBatch (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13GptDecoderPtrE">tensorrt_llm::runtime::GptDecoderBatch::GptDecoderPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15mAcceptByLogitsE">tensorrt_llm::runtime::GptDecoderBatch::mAcceptByLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mActualBatchSizeE">tensorrt_llm::runtime::GptDecoderBatch::mActualBatchSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch23mBatchSlotsAcceptLogitsE">tensorrt_llm::runtime::GptDecoderBatch::mBatchSlotsAcceptLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch23mBatchSlotsAcceptTokensE">tensorrt_llm::runtime::GptDecoderBatch::mBatchSlotsAcceptTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch18mBatchSlotsDecoderE">tensorrt_llm::runtime::GptDecoderBatch::mBatchSlotsDecoder (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mBatchSlotsSetupE">tensorrt_llm::runtime::GptDecoderBatch::mBatchSlotsSetup (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11mBeamWidthsE">tensorrt_llm::runtime::GptDecoderBatch::mBeamWidths (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch14mBufferManagerE">tensorrt_llm::runtime::GptDecoderBatch::mBufferManager (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13mCurandStatesE">tensorrt_llm::runtime::GptDecoderBatch::mCurandStates (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch9mDecodersE">tensorrt_llm::runtime::GptDecoderBatch::mDecoders (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15mDecodingInputsE">tensorrt_llm::runtime::GptDecoderBatch::mDecodingInputs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mDecodingOutputsE">tensorrt_llm::runtime::GptDecoderBatch::mDecodingOutputs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12mDraftLogitsE">tensorrt_llm::runtime::GptDecoderBatch::mDraftLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11mDraftProbsE">tensorrt_llm::runtime::GptDecoderBatch::mDraftProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch14mDraftTokenIdsE">tensorrt_llm::runtime::GptDecoderBatch::mDraftTokenIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch9mFinishedE">tensorrt_llm::runtime::GptDecoderBatch::mFinished (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch14mFinishedStepsE">tensorrt_llm::runtime::GptDecoderBatch::mFinishedSteps (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12mFinishedSumE">tensorrt_llm::runtime::GptDecoderBatch::mFinishedSum (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13mForwardEventE">tensorrt_llm::runtime::GptDecoderBatch::mForwardEvent (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13mForwardTokenE">tensorrt_llm::runtime::GptDecoderBatch::mForwardToken (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13mFusedDecoderE">tensorrt_llm::runtime::GptDecoderBatch::mFusedDecoder (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch29mGeneratedTokensPerEngineStepE">tensorrt_llm::runtime::GptDecoderBatch::mGeneratedTokensPerEngineStep (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch19mJointDecodingInputE">tensorrt_llm::runtime::GptDecoderBatch::mJointDecodingInput (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch20mJointDecodingOutputE">tensorrt_llm::runtime::GptDecoderBatch::mJointDecodingOutput (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch19mMaxAttentionWindowE">tensorrt_llm::runtime::GptDecoderBatch::mMaxAttentionWindow (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15mMaxBadWordsLenE">tensorrt_llm::runtime::GptDecoderBatch::mMaxBadWordsLen (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch13mMaxNewTokensE">tensorrt_llm::runtime::GptDecoderBatch::mMaxNewTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch18mMaxSequenceLengthE">tensorrt_llm::runtime::GptDecoderBatch::mMaxSequenceLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mMaxStopWordsLenE">tensorrt_llm::runtime::GptDecoderBatch::mMaxStopWordsLen (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch24mMaxTokensPerDecoderStepE">tensorrt_llm::runtime::GptDecoderBatch::mMaxTokensPerDecoderStep (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch23mMaxTokensPerEngineStepE">tensorrt_llm::runtime::GptDecoderBatch::mMaxTokensPerEngineStep (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8mNbStepsE">tensorrt_llm::runtime::GptDecoderBatch::mNbSteps (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch15mNumDraftTokensE">tensorrt_llm::runtime::GptDecoderBatch::mNumDraftTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mSinkTokenLengthE">tensorrt_llm::runtime::GptDecoderBatch::mSinkTokenLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch7mStreamE">tensorrt_llm::runtime::GptDecoderBatch::mStream (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8mStreamsE">tensorrt_llm::runtime::GptDecoderBatch::mStreams (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch17mTargetLogitsPtrsE">tensorrt_llm::runtime::GptDecoderBatch::mTargetLogitsPtrs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch12mTargetProbsE">tensorrt_llm::runtime::GptDecoderBatch::mTargetProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch10mUseMedusaE">tensorrt_llm::runtime::GptDecoderBatch::mUseMedusa (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch10mVocabSizeE">tensorrt_llm::runtime::GptDecoderBatch::mVocabSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16mVocabSizePaddedE">tensorrt_llm::runtime::GptDecoderBatch::mVocabSizePadded (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch8newBatchERK15GenerationInputRK16GenerationOutputRK14SamplingConfig">tensorrt_llm::runtime::GptDecoderBatch::newBatch (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch10newRequestE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig">tensorrt_llm::runtime::GptDecoderBatch::newRequest (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch16newRequestMedusaE8SizeTypeRKN13decoder_batch7RequestE">tensorrt_llm::runtime::GptDecoderBatch::newRequestMedusa (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11newRequestsERKNSt6vectorI8SizeTypeEERKNSt6vectorIN13decoder_batch7RequestEEERKNSt6vectorI14SamplingConfigEE">tensorrt_llm::runtime::GptDecoderBatch::newRequests (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch29newRequestSpeculativeDecodingE8SizeTypeRKN13decoder_batch7RequestERK14SamplingConfig">tensorrt_llm::runtime::GptDecoderBatch::newRequestSpeculativeDecoding (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime15GptDecoderBatch18postProcessRequestE8SizeType">tensorrt_llm::runtime::GptDecoderBatch::postProcessRequest (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch5setupERK12DecodingMode8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypebN8nvinfer18DataTypeERK14GptModelConfig">tensorrt_llm::runtime::GptDecoderBatch::setup (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch11setupMedusaERK14GptModelConfig">tensorrt_llm::runtime::GptDecoderBatch::setupMedusa (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch14SharedConstPtrE">tensorrt_llm::runtime::GptDecoderBatch::SharedConstPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime15GptDecoderBatch9TensorPtrE">tensorrt_llm::runtime::GptDecoderBatch::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfigE">tensorrt_llm::runtime::GptJsonConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfig">tensorrt_llm::runtime::GptJsonConfig::engineFilename (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getModelConfigEv">tensorrt_llm::runtime::GptJsonConfig::getModelConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig7getNameEv">tensorrt_llm::runtime::GptJsonConfig::getName (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig22getPipelineParallelismEv">tensorrt_llm::runtime::GptJsonConfig::getPipelineParallelism (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getPrecisionEv">tensorrt_llm::runtime::GptJsonConfig::getPrecision (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig20getTensorParallelismEv">tensorrt_llm::runtime::GptJsonConfig::getTensorParallelism (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig10getVersionEv">tensorrt_llm::runtime::GptJsonConfig::getVersion (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getWorldSizeEv">tensorrt_llm::runtime::GptJsonConfig::getWorldSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE8SizeType8SizeTypeRK14GptModelConfig">tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig15mGptModelConfigE">tensorrt_llm::runtime::GptJsonConfig::mGptModelConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5mNameE">tensorrt_llm::runtime::GptJsonConfig::mName (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig20mPipelineParallelismE">tensorrt_llm::runtime::GptJsonConfig::mPipelineParallelism (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig10mPrecisionE">tensorrt_llm::runtime::GptJsonConfig::mPrecision (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig18mTensorParallelismE">tensorrt_llm::runtime::GptJsonConfig::mTensorParallelism (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig8mVersionE">tensorrt_llm::runtime::GptJsonConfig::mVersion (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt10filesystem4pathE">tensorrt_llm::runtime::GptJsonConfig::parse (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt6stringE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERNSt7istreamE">[2]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfigE">tensorrt_llm::runtime::GptModelConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig20computeContextLogitsEb">tensorrt_llm::runtime::GptModelConfig::computeContextLogits (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig20computeContextLogitsEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig23computeGenerationLogitsEb">tensorrt_llm::runtime::GptModelConfig::computeGenerationLogits (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig23computeGenerationLogitsEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig27getContextFMHAForGenerationEv">tensorrt_llm::runtime::GptModelConfig::getContextFMHAForGeneration (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig11getDataTypeEv">tensorrt_llm::runtime::GptModelConfig::getDataType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig13getHiddenSizeEv">tensorrt_llm::runtime::GptModelConfig::getHiddenSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig13getKvDataTypeEv">tensorrt_llm::runtime::GptModelConfig::getKvDataType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getLoraModulesEv">tensorrt_llm::runtime::GptModelConfig::getLoraModules (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getMambaConfigEv">tensorrt_llm::runtime::GptModelConfig::getMambaConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getMaxBatchSizeEv">tensorrt_llm::runtime::GptModelConfig::getMaxBatchSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getMaxBeamWidthEv">tensorrt_llm::runtime::GptModelConfig::getMaxBeamWidth (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getMaxDraftLenEv">tensorrt_llm::runtime::GptModelConfig::getMaxDraftLen (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getMaxInputLenEv">tensorrt_llm::runtime::GptModelConfig::getMaxInputLen (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getMaxLoraRankEv">tensorrt_llm::runtime::GptModelConfig::getMaxLoraRank (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getMaxNumTokensEv">tensorrt_llm::runtime::GptModelConfig::getMaxNumTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig30getMaxPromptEmbeddingTableSizeEv">tensorrt_llm::runtime::GptModelConfig::getMaxPromptEmbeddingTableSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig17getMaxSequenceLenEv">tensorrt_llm::runtime::GptModelConfig::getMaxSequenceLen (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig19getMaxTokensPerStepEv">tensorrt_llm::runtime::GptModelConfig::getMaxTokensPerStep (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getMedusaModuleEv">tensorrt_llm::runtime::GptModelConfig::getMedusaModule (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig16getMlpHiddenSizeEv">tensorrt_llm::runtime::GptModelConfig::getMlpHiddenSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15getModelVariantEv">tensorrt_llm::runtime::GptModelConfig::getModelVariant (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig10getNbHeadsEv">tensorrt_llm::runtime::GptModelConfig::getNbHeads (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig12getNbKvHeadsEv">tensorrt_llm::runtime::GptModelConfig::getNbKvHeads (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig11getNbLayersE8SizeType">tensorrt_llm::runtime::GptModelConfig::getNbLayers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig19getPagedContextFMHAEv">tensorrt_llm::runtime::GptModelConfig::getPagedContextFMHA (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig12getQuantModeEv">tensorrt_llm::runtime::GptModelConfig::getQuantMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14getSizePerHeadEv">tensorrt_llm::runtime::GptModelConfig::getSizePerHead (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig17getTokensPerBlockEv">tensorrt_llm::runtime::GptModelConfig::getTokensPerBlock (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig12getVocabSizeEv">tensorrt_llm::runtime::GptModelConfig::getVocabSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig18getVocabSizePaddedE8SizeType">tensorrt_llm::runtime::GptModelConfig::getVocabSizePadded (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig14GptModelConfigE8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE">tensorrt_llm::runtime::GptModelConfig::GptModelConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14hasMambaConfigEv">tensorrt_llm::runtime::GptModelConfig::hasMambaConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig10isSsmBasedEv">tensorrt_llm::runtime::GptModelConfig::isSsmBased (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig18isTransformerBasedEv">tensorrt_llm::runtime::GptModelConfig::isTransformerBased (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig21mComputeContextLogitsE">tensorrt_llm::runtime::GptModelConfig::mComputeContextLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig24mComputeGenerationLogitsE">tensorrt_llm::runtime::GptModelConfig::mComputeGenerationLogits (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig9mDataTypeE">tensorrt_llm::runtime::GptModelConfig::mDataType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig11mHiddenSizeE">tensorrt_llm::runtime::GptModelConfig::mHiddenSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mInputPackedE">tensorrt_llm::runtime::GptModelConfig::mInputPacked (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mLoraModulesE">tensorrt_llm::runtime::GptModelConfig::mLoraModules (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mMambaConfigE">tensorrt_llm::runtime::GptModelConfig::mMambaConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mMaxBatchSizeE">tensorrt_llm::runtime::GptModelConfig::mMaxBatchSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mMaxBeamWidthE">tensorrt_llm::runtime::GptModelConfig::mMaxBeamWidth (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mMaxDraftLenE">tensorrt_llm::runtime::GptModelConfig::mMaxDraftLen (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mMaxInputLenE">tensorrt_llm::runtime::GptModelConfig::mMaxInputLen (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mMaxLoraRankE">tensorrt_llm::runtime::GptModelConfig::mMaxLoraRank (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mMaxNumTokensE">tensorrt_llm::runtime::GptModelConfig::mMaxNumTokens (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig28mMaxPromptEmbeddingTableSizeE">tensorrt_llm::runtime::GptModelConfig::mMaxPromptEmbeddingTableSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig15mMaxSequenceLenE">tensorrt_llm::runtime::GptModelConfig::mMaxSequenceLen (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mMedusaModuleE">tensorrt_llm::runtime::GptModelConfig::mMedusaModule (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig14mMlpHiddenSizeE">tensorrt_llm::runtime::GptModelConfig::mMlpHiddenSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mModelVariantE">tensorrt_llm::runtime::GptModelConfig::mModelVariant (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig8mNbHeadsE">tensorrt_llm::runtime::GptModelConfig::mNbHeads (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig10mNbKvHeadsE">tensorrt_llm::runtime::GptModelConfig::mNbKvHeads (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig9mNbLayersE">tensorrt_llm::runtime::GptModelConfig::mNbLayers (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig12ModelVariantE">tensorrt_llm::runtime::GptModelConfig::ModelVariant (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig12ModelVariant4kGlmE">tensorrt_llm::runtime::GptModelConfig::ModelVariant::kGlm (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig12ModelVariant4kGptE">tensorrt_llm::runtime::GptModelConfig::ModelVariant::kGpt (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig12ModelVariant6kMambaE">tensorrt_llm::runtime::GptModelConfig::ModelVariant::kMamba (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig17mPagedContextFMHAE">tensorrt_llm::runtime::GptModelConfig::mPagedContextFMHA (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig13mPagedKvCacheE">tensorrt_llm::runtime::GptModelConfig::mPagedKvCache (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig11mPagedStateE">tensorrt_llm::runtime::GptModelConfig::mPagedState (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig10mQuantModeE">tensorrt_llm::runtime::GptModelConfig::mQuantMode (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig12mSizePerHeadE">tensorrt_llm::runtime::GptModelConfig::mSizePerHead (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig15mTokensPerBlockE">tensorrt_llm::runtime::GptModelConfig::mTokensPerBlock (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig28mUseContextFMHAForGenerationE">tensorrt_llm::runtime::GptModelConfig::mUseContextFMHAForGeneration (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig19mUseCustomAllReduceE">tensorrt_llm::runtime::GptModelConfig::mUseCustomAllReduce (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig22mUseGptAttentionPluginE">tensorrt_llm::runtime::GptModelConfig::mUseGptAttentionPlugin (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig14mUseLoraPluginE">tensorrt_llm::runtime::GptModelConfig::mUseLoraPlugin (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig21mUseMambaConv1dPluginE">tensorrt_llm::runtime::GptModelConfig::mUseMambaConv1dPlugin (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig10mVocabSizeE">tensorrt_llm::runtime::GptModelConfig::mVocabSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setLoraModulesERKNSt6vectorI10LoraModuleEE">tensorrt_llm::runtime::GptModelConfig::setLoraModules (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setMambaConfigERK11MambaConfig">tensorrt_llm::runtime::GptModelConfig::setMambaConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxBatchSizeE8SizeType">tensorrt_llm::runtime::GptModelConfig::setMaxBatchSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxBeamWidthE8SizeType">tensorrt_llm::runtime::GptModelConfig::setMaxBeamWidth (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setMaxDraftLenE8SizeType">tensorrt_llm::runtime::GptModelConfig::setMaxDraftLen (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setMaxInputLenE8SizeType">tensorrt_llm::runtime::GptModelConfig::setMaxInputLen (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setMaxLoraRankE8SizeType">tensorrt_llm::runtime::GptModelConfig::setMaxLoraRank (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMaxNumTokensENSt8optionalI8SizeTypeEE">tensorrt_llm::runtime::GptModelConfig::setMaxNumTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig30setMaxPromptEmbeddingTableSizeE8SizeType">tensorrt_llm::runtime::GptModelConfig::setMaxPromptEmbeddingTableSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig17setMaxSequenceLenE8SizeType">tensorrt_llm::runtime::GptModelConfig::setMaxSequenceLen (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setMedusaModuleERK12MedusaModule">tensorrt_llm::runtime::GptModelConfig::setMedusaModule (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig16setMlpHiddenSizeE8SizeType">tensorrt_llm::runtime::GptModelConfig::setMlpHiddenSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig15setModelVariantE12ModelVariant">tensorrt_llm::runtime::GptModelConfig::setModelVariant (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig12setNbKvHeadsE8SizeType">tensorrt_llm::runtime::GptModelConfig::setNbKvHeads (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig19setPagedContextFMHAEb">tensorrt_llm::runtime::GptModelConfig::setPagedContextFMHA (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig12setQuantModeEN6common9QuantModeE">tensorrt_llm::runtime::GptModelConfig::setQuantMode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig14setSizePerHeadE8SizeType">tensorrt_llm::runtime::GptModelConfig::setSizePerHead (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig17setTokensPerBlockE8SizeType">tensorrt_llm::runtime::GptModelConfig::setTokensPerBlock (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig30setUseContextFMHAForGenerationEb">tensorrt_llm::runtime::GptModelConfig::setUseContextFMHAForGeneration (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig24supportsInflightBatchingEv">tensorrt_llm::runtime::GptModelConfig::supportsInflightBatching (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig18useCustomAllReduceEb">tensorrt_llm::runtime::GptModelConfig::useCustomAllReduce (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig18useCustomAllReduceEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig21useGptAttentionPluginEb">tensorrt_llm::runtime::GptModelConfig::useGptAttentionPlugin (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig21useGptAttentionPluginEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig13useLoraPluginEb">tensorrt_llm::runtime::GptModelConfig::useLoraPlugin (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig13useLoraPluginEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig20useMambaConv1dPluginEb">tensorrt_llm::runtime::GptModelConfig::useMambaConv1dPlugin (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig20useMambaConv1dPluginEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig9useMedusaEv">tensorrt_llm::runtime::GptModelConfig::useMedusa (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig14usePackedInputEb">tensorrt_llm::runtime::GptModelConfig::usePackedInput (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig14usePackedInputEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig15usePagedKvCacheEb">tensorrt_llm::runtime::GptModelConfig::usePagedKvCache (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15usePagedKvCacheEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14GptModelConfig13usePagedStateEb">tensorrt_llm::runtime::GptModelConfig::usePagedState (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig13usePagedStateEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14GptModelConfig15usePromptTuningEv">tensorrt_llm::runtime::GptModelConfig::usePromptTuning (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSessionE">tensorrt_llm::runtime::GptSession (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession6ConfigE">tensorrt_llm::runtime::GptSession::Config (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession6Config6ConfigE8SizeType8SizeType8SizeType">tensorrt_llm::runtime::GptSession::Config::Config (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession6Config17ctxMicroBatchSizeE">tensorrt_llm::runtime::GptSession::Config::ctxMicroBatchSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession6Config13cudaGraphModeE">tensorrt_llm::runtime::GptSession::Config::cudaGraphMode (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession6Config17decoderPerRequestE">tensorrt_llm::runtime::GptSession::Config::decoderPerRequest (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession6Config12decodingModeE">tensorrt_llm::runtime::GptSession::Config::decodingMode (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession6Config17genMicroBatchSizeE">tensorrt_llm::runtime::GptSession::Config::genMicroBatchSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession6Config13kvCacheConfigE">tensorrt_llm::runtime::GptSession::Config::kvCacheConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession6Config12maxBatchSizeE">tensorrt_llm::runtime::GptSession::Config::maxBatchSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession6Config12maxBeamWidthE">tensorrt_llm::runtime::GptSession::Config::maxBeamWidth (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession6Config17maxSequenceLengthE">tensorrt_llm::runtime::GptSession::Config::maxSequenceLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession6Config17normalizeLogProbsE">tensorrt_llm::runtime::GptSession::Config::normalizeLogProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession13createBuffersE8SizeType">tensorrt_llm::runtime::GptSession::createBuffers (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession14createContextsEv">tensorrt_llm::runtime::GptSession::createContexts (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession30createCustomAllReduceWorkspaceE8SizeType8SizeType8SizeType">tensorrt_llm::runtime::GptSession::createCustomAllReduceWorkspace (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession14createDecodersE8SizeType8SizeType8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeEb8SizeTypeRK12DecodingMode">tensorrt_llm::runtime::GptSession::createDecoders (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession20createKvCacheManagerE8SizeType8SizeType8SizeType8SizeType8SizeTypeRK13KvCacheConfig">tensorrt_llm::runtime::GptSession::createKvCacheManager (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession30createOnTokenGeneratedCallbackER16GenerationOutput">tensorrt_llm::runtime::GptSession::createOnTokenGeneratedCallback (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutorE">tensorrt_llm::runtime::GptSession::CudaGraphExecutor (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor5clearEv">tensorrt_llm::runtime::GptSession::CudaGraphExecutor::clear (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6createERK11cudaGraph_t">tensorrt_llm::runtime::GptSession::CudaGraphExecutor::create (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor17CudaGraphExecutorEv">tensorrt_llm::runtime::GptSession::CudaGraphExecutor::CudaGraphExecutor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor11hasInstanceEv">tensorrt_llm::runtime::GptSession::CudaGraphExecutor::hasInstance (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6launchERK10CudaStream">tensorrt_llm::runtime::GptSession::CudaGraphExecutor::launch (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor9mInstanceE">tensorrt_llm::runtime::GptSession::CudaGraphExecutor::mInstance (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor16prepareNextGraphERK11TllmRuntime8SizeType">tensorrt_llm::runtime::GptSession::CudaGraphExecutor::prepareNextGraph (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor6updateERK11cudaGraph_t">tensorrt_llm::runtime::GptSession::CudaGraphExecutor::update (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutor14uploadToStreamERK10CudaStream">tensorrt_llm::runtime::GptSession::CudaGraphExecutor::uploadToStream (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession17CudaGraphExecutorD0Ev">tensorrt_llm::runtime::GptSession::CudaGraphExecutor::~CudaGraphExecutor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession16decoderStepAsyncE8SizeType8SizeType">tensorrt_llm::runtime::GptSession::decoderStepAsync (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession18executeContextStepERKNSt6vectorI15GenerationInputEERKNSt6vectorI8SizeTypeEEPK14KvCacheManager">tensorrt_llm::runtime::GptSession::executeContextStep (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession21executeGenerationStepE8SizeTypeRKNSt6vectorI15GenerationInputEERNSt6vectorI16GenerationOutputEERKNSt6vectorI8SizeTypeEEP14KvCacheManagerRNSt6vectorIbEE">tensorrt_llm::runtime::GptSession::executeGenerationStep (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession8finalizeE8SizeType">tensorrt_llm::runtime::GptSession::finalize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession8generateER16GenerationOutputRK15GenerationInputRK14SamplingConfigKNSt10shared_ptrI18GenerationProfilerEE">tensorrt_llm::runtime::GptSession::generate (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession15generateBatchedERNSt6vectorI16GenerationOutputEERKNSt6vectorI15GenerationInputEERK14SamplingConfigRK22TokenGeneratedCallbackKNSt10shared_ptrI18GenerationProfilerEE">tensorrt_llm::runtime::GptSession::generateBatched (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession18GenerationProfilerE">tensorrt_llm::runtime::GptSession::GenerationProfiler (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession18GenerationProfiler3endE">tensorrt_llm::runtime::GptSession::GenerationProfiler::end (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession18GenerationProfiler5flagsE">tensorrt_llm::runtime::GptSession::GenerationProfiler::flags (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession18GenerationProfiler18GenerationProfilerEv">tensorrt_llm::runtime::GptSession::GenerationProfiler::GenerationProfiler (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession18GenerationProfiler16getElapsedTimeMsEv">tensorrt_llm::runtime::GptSession::GenerationProfiler::getElapsedTimeMs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10GptSession18GenerationProfiler6getEndEv">tensorrt_llm::runtime::GptSession::GenerationProfiler::getEnd (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10GptSession18GenerationProfiler8getStartEv">tensorrt_llm::runtime::GptSession::GenerationProfiler::getStart (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession18GenerationProfiler5startE">tensorrt_llm::runtime::GptSession::GenerationProfiler::start (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10GptSession16getBufferManagerEv">tensorrt_llm::runtime::GptSession::getBufferManager (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10GptSession9getDeviceEv">tensorrt_llm::runtime::GptSession::getDevice (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10GptSession9getLoggerEv">tensorrt_llm::runtime::GptSession::getLogger (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10GptSession16getLogitDataTypeEv">tensorrt_llm::runtime::GptSession::getLogitDataType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10GptSession14getModelConfigEv">tensorrt_llm::runtime::GptSession::getModelConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10GptSession20getNormalizeLogProbsEv">tensorrt_llm::runtime::GptSession::getNormalizeLogProbs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10GptSession14getWorldConfigEv">tensorrt_llm::runtime::GptSession::getWorldConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigPKvNSt6size_tE9LoggerPtr">tensorrt_llm::runtime::GptSession::GptSession (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6stringE9LoggerPtr">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession10GptSessionERK6ConfigRK14GptModelConfigRK11WorldConfigRKNSt6vectorI7uint8_tEE9LoggerPtr">[2]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10GptSession11initDecoderER7ITensorRK15GenerationInputRK16GenerationOutputRK14SamplingConfig8SizeType">tensorrt_llm::runtime::GptSession::initDecoder (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession19kvCacheAddSequencesE8SizeType8SizeType8SizeType">tensorrt_llm::runtime::GptSession::kvCacheAddSequences (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession13KvCacheConfigE">tensorrt_llm::runtime::GptSession::KvCacheConfig (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession14KvCacheManagerE">tensorrt_llm::runtime::GptSession::KvCacheManager (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession9LoggerPtrE">tensorrt_llm::runtime::GptSession::LoggerPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession8mBuffersE">tensorrt_llm::runtime::GptSession::mBuffers (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession10mCommEventE">tensorrt_llm::runtime::GptSession::mCommEvent (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession9mCommPtrsE">tensorrt_llm::runtime::GptSession::mCommPtrs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession11mCommStreamE">tensorrt_llm::runtime::GptSession::mCommStream (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession19mCudaGraphInstancesE">tensorrt_llm::runtime::GptSession::mCudaGraphInstances (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession14mCudaGraphModeE">tensorrt_llm::runtime::GptSession::mCudaGraphMode (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession26mDecoderMaxAttentionWindowE">tensorrt_llm::runtime::GptSession::mDecoderMaxAttentionWindow (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession25mDecoderMaxSequenceLengthE">tensorrt_llm::runtime::GptSession::mDecoderMaxSequenceLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession9mDecodersE">tensorrt_llm::runtime::GptSession::mDecoders (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession23mDecoderSinkTokenLengthE">tensorrt_llm::runtime::GptSession::mDecoderSinkTokenLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession7mDeviceE">tensorrt_llm::runtime::GptSession::mDevice (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfigE">tensorrt_llm::runtime::GptSession::MicroBatchConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig12ctxBatchSizeE">tensorrt_llm::runtime::GptSession::MicroBatchConfig::ctxBatchSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig12genBatchSizeE">tensorrt_llm::runtime::GptSession::MicroBatchConfig::genBatchSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10GptSession16MicroBatchConfig13getGenGraphIdE8SizeType8SizeType">tensorrt_llm::runtime::GptSession::MicroBatchConfig::getGenGraphId (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig16MicroBatchConfigE8SizeType8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEE">tensorrt_llm::runtime::GptSession::MicroBatchConfig::MicroBatchConfig (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig16MicroBatchConfigEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig13numCtxBatchesE">tensorrt_llm::runtime::GptSession::MicroBatchConfig::numCtxBatches (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10GptSession16MicroBatchConfig12numCtxPerGenEv">tensorrt_llm::runtime::GptSession::MicroBatchConfig::numCtxPerGen (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession16MicroBatchConfig13numGenBatchesE">tensorrt_llm::runtime::GptSession::MicroBatchConfig::numGenBatches (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession17mIpcMemoryHandlesE">tensorrt_llm::runtime::GptSession::mIpcMemoryHandles (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession15mKvCacheManagerE">tensorrt_llm::runtime::GptSession::mKvCacheManager (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession7mLoggerE">tensorrt_llm::runtime::GptSession::mLogger (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession17mMicroBatchConfigE">tensorrt_llm::runtime::GptSession::mMicroBatchConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession12mModelConfigE">tensorrt_llm::runtime::GptSession::mModelConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession18mNormalizeLogProbsE">tensorrt_llm::runtime::GptSession::mNormalizeLogProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession13mPipelineCommE">tensorrt_llm::runtime::GptSession::mPipelineComm (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession15mReceivedEventsE">tensorrt_llm::runtime::GptSession::mReceivedEvents (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession8mRuntimeE">tensorrt_llm::runtime::GptSession::mRuntime (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession12mWorldConfigE">tensorrt_llm::runtime::GptSession::mWorldConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession5setupERK6Config">tensorrt_llm::runtime::GptSession::setup (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession14shouldStopSyncE8SizeType8SizeType8SizeType">tensorrt_llm::runtime::GptSession::shouldStopSync (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession9TensorPtrE">tensorrt_llm::runtime::GptSession::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession22TokenGeneratedCallbackE">tensorrt_llm::runtime::GptSession::TokenGeneratedCallback (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10GptSession13useCudaGraphsEv">tensorrt_llm::runtime::GptSession::useCudaGraphs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBufferE">tensorrt_llm::runtime::IBuffer (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE">tensorrt_llm::runtime::IBuffer::data (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer4dataEv">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataEv">[3]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer8DataTypeE">tensorrt_llm::runtime::IBuffer::DataType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7IBuffer11getCapacityEv">tensorrt_llm::runtime::IBuffer::getCapacity (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7IBuffer11getDataTypeEv">tensorrt_llm::runtime::IBuffer::getDataType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7IBuffer15getDataTypeNameEv">tensorrt_llm::runtime::IBuffer::getDataTypeName (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7IBuffer13getMemoryTypeEv">tensorrt_llm::runtime::IBuffer::getMemoryType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7IBuffer17getMemoryTypeNameEv">tensorrt_llm::runtime::IBuffer::getMemoryTypeName (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7IBuffer7getSizeEv">tensorrt_llm::runtime::IBuffer::getSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7IBuffer14getSizeInBytesEv">tensorrt_llm::runtime::IBuffer::getSizeInBytes (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferERK7IBuffer">tensorrt_llm::runtime::IBuffer::IBuffer (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer10memoryTypeEPKv">tensorrt_llm::runtime::IBuffer::memoryType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBufferaSERK7IBuffer">tensorrt_llm::runtime::IBuffer::operator= (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer7releaseEv">tensorrt_llm::runtime::IBuffer::release (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer6resizeENSt6size_tE">tensorrt_llm::runtime::IBuffer::resize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer14SharedConstPtrE">tensorrt_llm::runtime::IBuffer::SharedConstPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer9SharedPtrE">tensorrt_llm::runtime::IBuffer::SharedPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE">tensorrt_llm::runtime::IBuffer::slice (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE">[3]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7IBuffer7toBytesENSt6size_tE">tensorrt_llm::runtime::IBuffer::toBytes (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer14UniqueConstPtrE">tensorrt_llm::runtime::IBuffer::UniqueConstPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer9UniquePtrE">tensorrt_llm::runtime::IBuffer::UniquePtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE">tensorrt_llm::runtime::IBuffer::view (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtr">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE">[2]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE">tensorrt_llm::runtime::IBuffer::wrap (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE">[4]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7IBufferD0Ev">tensorrt_llm::runtime::IBuffer::~IBuffer (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11IGptDecoderE">tensorrt_llm::runtime::IGptDecoder (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11IGptDecoder22acceptDraftTokensByIdsERK7ITensorRK7ITensorRK7ITensorRK7ITensorR7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorRKN13BufferManager13CudaStreamPtrE">tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11IGptDecoder25acceptDraftTokensByLogitsER7ITensorRK7ITensorR7ITensorR7ITensorRK7ITensorR7ITensorRK7ITensor8SizeType8SizeTypebfP13curandState_tRKN13BufferManager13CudaStreamPtrE">tensorrt_llm::runtime::IGptDecoder::acceptDraftTokensByLogits (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERK12DecodingModeN8nvinfer18DataTypeE6size_t6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrENSt8optionalIN7runtime8SizeTypeEEENSt8optionalIN7runtime8SizeTypeEEE">tensorrt_llm::runtime::IGptDecoder::create (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11IGptDecoder7forwardER14DecodingOutputRK13DecodingInput">tensorrt_llm::runtime::IGptDecoder::forward (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput">tensorrt_llm::runtime::IGptDecoder::forwardAsync (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11IGptDecoder10gatherTreeER7ITensorRK14DecodingOutputRK13DecodingInputRK13BufferManager">tensorrt_llm::runtime::IGptDecoder::gatherTree (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11IGptDecoder17getSamplingConfigEv">tensorrt_llm::runtime::IGptDecoder::getSamplingConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_t8SizeTypeRKNSt8optionalI9TensorPtrEE">tensorrt_llm::runtime::IGptDecoder::setup (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11IGptDecoder9TensorPtrE">tensorrt_llm::runtime::IGptDecoder::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11IGptDecoder34updateKVCacheBasedOnAcceptedTokensERK7ITensorRK7ITensorRK7ITensorRK7ITensorRK14GptModelConfigRK11WorldConfigN13BufferManager13CudaStreamPtrE8SizeType8SizeType8SizeTypeN8nvinfer18DataTypeE">tensorrt_llm::runtime::IGptDecoder::updateKVCacheBasedOnAcceptedTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11IGptDecoderD0Ev">tensorrt_llm::runtime::IGptDecoder::~IGptDecoder (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatchE">tensorrt_llm::runtime::IGptDecoderBatch (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch13CudaStreamPtrE">tensorrt_llm::runtime::IGptDecoderBatch::CudaStreamPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch8finalizeE8SizeType">tensorrt_llm::runtime::IGptDecoderBatch::finalize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch7forwardERN13decoder_batch6OutputERKN13decoder_batch5InputE">tensorrt_llm::runtime::IGptDecoderBatch::forward (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch12forwardAsyncERN13decoder_batch6OutputERKN13decoder_batch5InputE">tensorrt_llm::runtime::IGptDecoderBatch::forwardAsync (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch11forwardSyncERKN13decoder_batch5TokenE">tensorrt_llm::runtime::IGptDecoderBatch::forwardSync (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch14getCumLogProbsE8SizeType">tensorrt_llm::runtime::IGptDecoderBatch::getCumLogProbs (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch14getCumLogProbsEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch11getFinishedEv">tensorrt_llm::runtime::IGptDecoderBatch::getFinished (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch11getLogProbsE8SizeType">tensorrt_llm::runtime::IGptDecoderBatch::getLogProbs (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch11getLogProbsEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch30getMedusaAcceptedLengthsCumSumEv">tensorrt_llm::runtime::IGptDecoderBatch::getMedusaAcceptedLengthsCumSum (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch28getMedusaAcceptedPackedPathsEv">tensorrt_llm::runtime::IGptDecoderBatch::getMedusaAcceptedPackedPaths (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch10getNbStepsEv">tensorrt_llm::runtime::IGptDecoderBatch::getNbSteps (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch18getNextDraftTokensEv">tensorrt_llm::runtime::IGptDecoderBatch::getNextDraftTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch12getOutputIdsE8SizeType">tensorrt_llm::runtime::IGptDecoderBatch::getOutputIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime16IGptDecoderBatch12getParentIdsEv">tensorrt_llm::runtime::IGptDecoderBatch::getParentIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch16IGptDecoderBatchEv">tensorrt_llm::runtime::IGptDecoderBatch::IGptDecoderBatch (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch11newRequestsERKNSt6vectorI8SizeTypeEERKNSt6vectorIN13decoder_batch7RequestEEERKNSt6vectorI14SamplingConfigEE">tensorrt_llm::runtime::IGptDecoderBatch::newRequests (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch9TensorPtrE">tensorrt_llm::runtime::IGptDecoderBatch::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16IGptDecoderBatch8TokenPtrE">tensorrt_llm::runtime::IGptDecoderBatch::TokenPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemoryE">tensorrt_llm::runtime::IpcMemory (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryEv">tensorrt_llm::runtime::IpcMemory::allocateIpcMemory (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemory16destroyIpcMemoryEv">tensorrt_llm::runtime::IpcMemory::destroyIpcMemory (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemory10FLAGS_SIZEE">tensorrt_llm::runtime::IpcMemory::FLAGS_SIZE (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9IpcMemory17getCommPtrsTensorEv">tensorrt_llm::runtime::IpcMemory::getCommPtrsTensor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERK11WorldConfigNSt6size_tE">tensorrt_llm::runtime::IpcMemory::IpcMemory (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemory10mBufferPtrE">tensorrt_llm::runtime::IpcMemory::mBufferPtr (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemory11mBufferSizeE">tensorrt_llm::runtime::IpcMemory::mBufferSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemory9mCommPtrsE">tensorrt_llm::runtime::IpcMemory::mCommPtrs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemory12mWorldConfigE">tensorrt_llm::runtime::IpcMemory::mWorldConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemory9TensorPtrE">tensorrt_llm::runtime::IpcMemory::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9IpcMemoryD0Ev">tensorrt_llm::runtime::IpcMemory::~IpcMemory (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoderE">tensorrt_llm::runtime::IStatefulGptDecoder (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder13CudaStreamPtrE">tensorrt_llm::runtime::IStatefulGptDecoder::CudaStreamPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder8finalizeEv">tensorrt_llm::runtime::IStatefulGptDecoder::finalize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder7forwardERN7decoder6OutputERKN7decoder5InputE">tensorrt_llm::runtime::IStatefulGptDecoder::forward (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder12forwardAsyncERN7decoder6OutputERKN7decoder5InputE">tensorrt_llm::runtime::IStatefulGptDecoder::forwardAsync (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder11forwardSyncEv">tensorrt_llm::runtime::IStatefulGptDecoder::forwardSync (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder15getAllNewTokensEv">tensorrt_llm::runtime::IStatefulGptDecoder::getAllNewTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder14getCumLogProbsEv">tensorrt_llm::runtime::IStatefulGptDecoder::getCumLogProbs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder11getLogProbsEv">tensorrt_llm::runtime::IStatefulGptDecoder::getLogProbs (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder13getNbFinishedEv">tensorrt_llm::runtime::IStatefulGptDecoder::getNbFinished (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder12getNewTokensE8SizeType">tensorrt_llm::runtime::IStatefulGptDecoder::getNewTokens (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime19IStatefulGptDecoder12getOutputIdsEv">tensorrt_llm::runtime::IStatefulGptDecoder::getOutputIds (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder19IStatefulGptDecoderEv">tensorrt_llm::runtime::IStatefulGptDecoder::IStatefulGptDecoder (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder8newBatchERK15GenerationInputRK16GenerationOutputRK14SamplingConfig">tensorrt_llm::runtime::IStatefulGptDecoder::newBatch (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder5setupERK12DecodingMode8SizeType8SizeType8SizeType8SizeType8SizeType8SizeTypebN8nvinfer18DataTypeERK14GptModelConfig">tensorrt_llm::runtime::IStatefulGptDecoder::setup (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoder9TensorPtrE">tensorrt_llm::runtime::IStatefulGptDecoder::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19IStatefulGptDecoderD0Ev">tensorrt_llm::runtime::IStatefulGptDecoder::~IStatefulGptDecoder (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensorE">tensorrt_llm::runtime::ITensor (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor8castSizeE6size_t">tensorrt_llm::runtime::ITensor::castSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor7DimTypeE">tensorrt_llm::runtime::ITensor::DimType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7ITensor8getShapeEv">tensorrt_llm::runtime::ITensor::getShape (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorERK7ITensor">tensorrt_llm::runtime::ITensor::ITensor (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorEv">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor9makeShapeERKNSt16initializer_listI8SizeTypeEE">tensorrt_llm::runtime::ITensor::makeShape (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensoraSERK7ITensor">tensorrt_llm::runtime::ITensor::operator= (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor7reshapeERK5Shape">tensorrt_llm::runtime::ITensor::reshape (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor6resizeENSt6size_tE">tensorrt_llm::runtime::ITensor::resize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor5ShapeE">tensorrt_llm::runtime::ITensor::Shape (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T8SizeType">tensorrt_llm::runtime::ITensor::shapeEquals (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T8SizeType">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor11shapeEqualsERK5ShapeRK5Shape">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERK5Shape">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERKNSt16initializer_listI8SizeTypeEE">[4]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor14SharedConstPtrE">tensorrt_llm::runtime::ITensor::SharedConstPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor9SharedPtrE">tensorrt_llm::runtime::ITensor::SharedPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE">tensorrt_llm::runtime::ITensor::slice (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE">[3]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeE8SizeType">tensorrt_llm::runtime::ITensor::squeeze (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape8SizeType">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor8toStringERK5Shape">tensorrt_llm::runtime::ITensor::toString (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor14UniqueConstPtrE">tensorrt_llm::runtime::ITensor::UniqueConstPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor9UniquePtrE">tensorrt_llm::runtime::ITensor::UniquePtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeE8SizeType">tensorrt_llm::runtime::ITensor::unsqueeze (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape8SizeType">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape">tensorrt_llm::runtime::ITensor::view (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor4viewE9SharedPtr">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape">[2]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor6volumeERK5Shape">tensorrt_llm::runtime::ITensor::volume (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor17volumeNonNegativeERK5Shape">tensorrt_llm::runtime::ITensor::volumeNonNegative (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape">tensorrt_llm::runtime::ITensor::wrap (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE">[4]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime7ITensorD0Ev">tensorrt_llm::runtime::ITensor::~ITensor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCacheE">tensorrt_llm::runtime::LoraCache (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache4bumpE10TaskIdType">tensorrt_llm::runtime::LoraCache::bump (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache18bumpTaskInProgressE10TaskIdType">tensorrt_llm::runtime::LoraCache::bumpTaskInProgress (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache19claimPagesWithEvictE8SizeType">tensorrt_llm::runtime::LoraCache::claimPagesWithEvict (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb">tensorrt_llm::runtime::LoraCache::copyTask (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache">tensorrt_llm::runtime::LoraCache::copyTaskMapPages (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK14GptModelConfigRK11WorldConfigNSt13unordered_mapI8SizeType10LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE">tensorrt_llm::runtime::LoraCache::copyToPages (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE10TaskIdType">tensorrt_llm::runtime::LoraCache::determineNumPages (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE9TensorPtr">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9LoraCache4fitsE9TensorPtr">tensorrt_llm::runtime::LoraCache::fits (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache3getE10TaskIdType">tensorrt_llm::runtime::LoraCache::get (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9LoraCache11getNumPagesEv">tensorrt_llm::runtime::LoraCache::getNumPages (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9LoraCache10getPagePtrE6size_t">tensorrt_llm::runtime::LoraCache::getPagePtr (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9LoraCache9getStatusE10TaskIdType">tensorrt_llm::runtime::LoraCache::getStatus (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9LoraCache3hasE10TaskIdType">tensorrt_llm::runtime::LoraCache::has (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9LoraCache6isDoneE10TaskIdType">tensorrt_llm::runtime::LoraCache::isDone (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9LoraCache8isLoadedE10TaskIdType">tensorrt_llm::runtime::LoraCache::isLoaded (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr">tensorrt_llm::runtime::LoraCache::loadWeights (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK14GptModelConfigRK11WorldConfigRK13BufferManager">tensorrt_llm::runtime::LoraCache::LoraCache (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache11markAllDoneEv">tensorrt_llm::runtime::LoraCache::markAllDone (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache12markTaskDoneE10TaskIdType">tensorrt_llm::runtime::LoraCache::markTaskDone (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache14mBufferManagerE">tensorrt_llm::runtime::LoraCache::mBufferManager (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9mCacheMapE">tensorrt_llm::runtime::LoraCache::mCacheMap (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache11mCacheMutexE">tensorrt_llm::runtime::LoraCache::mCacheMutex (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache17mCachePageManagerE">tensorrt_llm::runtime::LoraCache::mCachePageManager (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21mDeviceBufferManagersE">tensorrt_llm::runtime::LoraCache::mDeviceBufferManagers (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache10mDoneTasksE">tensorrt_llm::runtime::LoraCache::mDoneTasks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache16mInProgressTasksE">tensorrt_llm::runtime::LoraCache::mInProgressTasks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache12mModelConfigE">tensorrt_llm::runtime::LoraCache::mModelConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache17mModuleIdToModuleE">tensorrt_llm::runtime::LoraCache::mModuleIdToModule (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache18mPageManagerConfigE">tensorrt_llm::runtime::LoraCache::mPageManagerConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache11mPagesMutexE">tensorrt_llm::runtime::LoraCache::mPagesMutex (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache12mWorldConfigE">tensorrt_llm::runtime::LoraCache::mWorldConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb">tensorrt_llm::runtime::LoraCache::put (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor8SizeType8SizeType">tensorrt_llm::runtime::LoraCache::splitTransposeCpu (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor8SizeType8SizeType">tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache10TaskIdTypeE">tensorrt_llm::runtime::LoraCache::TaskIdType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig11adapterSizeE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::adapterSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig6inSizeE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::inSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7layerIdE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::layerId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8moduleIdE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::moduleId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8numSlotsE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::numSlots (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigeqERKN9LoraCache21TaskLayerModuleConfigE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7outSizeE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::outSize (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig6pageIdE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::pageId (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7slotIdxE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::slotIdx (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8toStringEv">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::toString (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig16weightsInPointerE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::weightsInPointer (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig17weightsOutPointerE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::weightsOutPointer (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache28TaskLayerModuleConfigListPtrE">tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfigListPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueE">tensorrt_llm::runtime::LoraCache::TaskValue (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue7configsE">tensorrt_llm::runtime::LoraCache::TaskValue::configs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue4doneE">tensorrt_llm::runtime::LoraCache::TaskValue::done (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue10inProgressE">tensorrt_llm::runtime::LoraCache::TaskValue::inProgress (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue2itE">tensorrt_llm::runtime::LoraCache::TaskValue::it (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue6loadedE">tensorrt_llm::runtime::LoraCache::TaskValue::loaded (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue14loadInProgressE">tensorrt_llm::runtime::LoraCache::TaskValue::loadInProgress (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueaSERR9TaskValue">tensorrt_llm::runtime::LoraCache::TaskValue::operator= (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue7pageIdsE">tensorrt_llm::runtime::LoraCache::TaskValue::pageIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb">tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERR9TaskValue">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueEv">[2]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueD0Ev">tensorrt_llm::runtime::LoraCache::TaskValue::~TaskValue (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache12TaskValuePtrE">tensorrt_llm::runtime::LoraCache::TaskValuePtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache9TensorPtrE">tensorrt_llm::runtime::LoraCache::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatusE">tensorrt_llm::runtime::LoraCache::ValueStatus (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus20kVALUE_STATUS_LOADEDE">tensorrt_llm::runtime::LoraCache::ValueStatus::kVALUE_STATUS_LOADED (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus21kVALUE_STATUS_MISSINGE">tensorrt_llm::runtime::LoraCache::ValueStatus::kVALUE_STATUS_MISSING (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus24kVALUE_STATUS_PROCESSINGE">tensorrt_llm::runtime::LoraCache::ValueStatus::kVALUE_STATUS_PROCESSING (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20LoraCachePageManagerE">tensorrt_llm::runtime::LoraCachePageManager (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager8blockPtrE8SizeType">tensorrt_llm::runtime::LoraCachePageManager::blockPtr (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10claimPagesE8SizeType">tensorrt_llm::runtime::LoraCachePageManager::claimPages (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10initializeERK13BufferManager">tensorrt_llm::runtime::LoraCachePageManager::initialize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager20LoraCachePageManagerERK26LoraCachePageManagerConfigRK13BufferManager">tensorrt_llm::runtime::LoraCachePageManager::LoraCachePageManager (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager7mConfigE">tensorrt_llm::runtime::LoraCachePageManager::mConfig (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12mFreePageIdsE">tensorrt_llm::runtime::LoraCachePageManager::mFreePageIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager11mIsPageFreeE">tensorrt_llm::runtime::LoraCachePageManager::mIsPageFree (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager11mPageBlocksE">tensorrt_llm::runtime::LoraCachePageManager::mPageBlocks (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager14mutablePagePtrENSt6size_tE">tensorrt_llm::runtime::LoraCachePageManager::mutablePagePtr (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager17numAvailablePagesEv">tensorrt_llm::runtime::LoraCachePageManager::numAvailablePages (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager7pagePtrENSt6size_tE">tensorrt_llm::runtime::LoraCachePageManager::pagePtr (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12releasePagesERKNSt6vectorINSt6size_tEEE">tensorrt_llm::runtime::LoraCachePageManager::releasePages (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager9TensorPtrE">tensorrt_llm::runtime::LoraCachePageManager::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfigE">tensorrt_llm::runtime::LoraCachePageManagerConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig11getDataTypeEv">tensorrt_llm::runtime::LoraCachePageManagerConfig::getDataType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig13getInitToZeroEv">tensorrt_llm::runtime::LoraCachePageManagerConfig::getInitToZero (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig19getMaxPagesPerBlockEv">tensorrt_llm::runtime::LoraCachePageManagerConfig::getMaxPagesPerBlock (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig13getMemoryTypeEv">tensorrt_llm::runtime::LoraCachePageManagerConfig::getMemoryType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig17getNumCopyStreamsEv">tensorrt_llm::runtime::LoraCachePageManagerConfig::getNumCopyStreams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig12getPageWidthEv">tensorrt_llm::runtime::LoraCachePageManagerConfig::getPageWidth (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig15getSlotsPerPageEv">tensorrt_llm::runtime::LoraCachePageManagerConfig::getSlotsPerPage (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig16getTotalNumPagesEv">tensorrt_llm::runtime::LoraCachePageManagerConfig::getTotalNumPages (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE8SizeType8SizeType8SizeType8SizeType8SizeType">tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig9mDataTypeE">tensorrt_llm::runtime::LoraCachePageManagerConfig::mDataType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11mInitToZeroE">tensorrt_llm::runtime::LoraCachePageManagerConfig::mInitToZero (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17mMaxPagesPerBlockE">tensorrt_llm::runtime::LoraCachePageManagerConfig::mMaxPagesPerBlock (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11mMemoryTypeE">tensorrt_llm::runtime::LoraCachePageManagerConfig::mMemoryType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15mNumCopyStreamsE">tensorrt_llm::runtime::LoraCachePageManagerConfig::mNumCopyStreams (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig10mPageWidthE">tensorrt_llm::runtime::LoraCachePageManagerConfig::mPageWidth (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13mSlotsPerPageE">tensorrt_llm::runtime::LoraCachePageManagerConfig::mSlotsPerPage (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig14mTotalNumPagesE">tensorrt_llm::runtime::LoraCachePageManagerConfig::mTotalNumPages (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11setDataTypeERKN8nvinfer18DataTypeE">tensorrt_llm::runtime::LoraCachePageManagerConfig::setDataType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setInitToZeroEb">tensorrt_llm::runtime::LoraCachePageManagerConfig::setInitToZero (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig19setMaxPagesPerBlockERK8SizeType">tensorrt_llm::runtime::LoraCachePageManagerConfig::setMaxPagesPerBlock (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setMemoryTypeERKN7runtime10MemoryTypeE">tensorrt_llm::runtime::LoraCachePageManagerConfig::setMemoryType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17setNumCopyStreamsE8SizeType">tensorrt_llm::runtime::LoraCachePageManagerConfig::setNumCopyStreams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig12setPageWidthERK8SizeType">tensorrt_llm::runtime::LoraCachePageManagerConfig::setPageWidth (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setSlotsPerPageERK8SizeType">tensorrt_llm::runtime::LoraCachePageManagerConfig::setSlotsPerPage (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setTotalNumPageERK8SizeType">tensorrt_llm::runtime::LoraCachePageManagerConfig::setTotalNumPage (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModuleE">tensorrt_llm::runtime::LoraModule (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE8SizeType8SizeType8SizeType8SizeType8SizeType8SizeType">tensorrt_llm::runtime::LoraModule::createLoraModules (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule18flattenedInOutSizeE8SizeType">tensorrt_llm::runtime::LoraModule::flattenedInOutSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule5inDimEv">tensorrt_llm::runtime::LoraModule::inDim (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule10inDimFirstEv">tensorrt_llm::runtime::LoraModule::inDimFirst (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule6inSizeE8SizeType">tensorrt_llm::runtime::LoraModule::inSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule12inTpSplitDimEv">tensorrt_llm::runtime::LoraModule::inTpSplitDim (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule18localInAdapterSizeE8SizeType8SizeType">tensorrt_llm::runtime::LoraModule::localInAdapterSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule10localInDimE8SizeType">tensorrt_llm::runtime::LoraModule::localInDim (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule14localInOutSizeE8SizeType8SizeType">tensorrt_llm::runtime::LoraModule::localInOutSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule11localInSizeE8SizeType8SizeType">tensorrt_llm::runtime::LoraModule::localInSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule19localOutAdapterSizeE8SizeType8SizeType">tensorrt_llm::runtime::LoraModule::localOutAdapterSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule11localOutDimE8SizeType">tensorrt_llm::runtime::LoraModule::localOutDim (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule12localOutSizeE8SizeType8SizeType">tensorrt_llm::runtime::LoraModule::localOutSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10LoraModule">tensorrt_llm::runtime::LoraModule::LoraModule (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType8SizeType8SizeTypebb8SizeType8SizeType">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleEv">[2]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule6mInDimE">tensorrt_llm::runtime::LoraModule::mInDim (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule11mInDimFirstE">tensorrt_llm::runtime::LoraModule::mInDimFirst (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule13mInTpSplitDimE">tensorrt_llm::runtime::LoraModule::mInTpSplitDim (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleTypeE">tensorrt_llm::runtime::LoraModule::ModuleType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kATTN_DENSEE">tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_DENSE (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_KE">tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_K (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_QE">tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_Q (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kATTN_QKVE">tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_QKV (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_VE">tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_V (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType17kCROSS_ATTN_DENSEE">tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_DENSE (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_KE">tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_K (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_QE">tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_Q (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType15kCROSS_ATTN_QKVE">tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_QKV (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_VE">tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_V (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType8kINVALIDE">tensorrt_llm::runtime::LoraModule::ModuleType::kINVALID (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_4H_TO_HE">tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_4H_TO_H (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kMLP_GATEE">tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_GATE (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_H_TO_4HE">tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_H_TO_4H (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule7mOutDimE">tensorrt_llm::runtime::LoraModule::mOutDim (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule12mOutDimFirstE">tensorrt_llm::runtime::LoraModule::mOutDimFirst (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule14mOutTpSplitDimE">tensorrt_llm::runtime::LoraModule::mOutTpSplitDim (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule5mTypeE">tensorrt_llm::runtime::LoraModule::mType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule4nameEv">tensorrt_llm::runtime::LoraModule::name (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModuleaSERK10LoraModule">tensorrt_llm::runtime::LoraModule::operator= (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule6outDimEv">tensorrt_llm::runtime::LoraModule::outDim (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule11outDimFirstEv">tensorrt_llm::runtime::LoraModule::outDimFirst (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule7outSizeE8SizeType">tensorrt_llm::runtime::LoraModule::outSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule13outTpSplitDimEv">tensorrt_llm::runtime::LoraModule::outTpSplitDim (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule9TensorPtrE">tensorrt_llm::runtime::LoraModule::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10ModuleType">tensorrt_llm::runtime::LoraModule::toModuleName (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE8SizeType">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleTypeERKNSt11string_viewE">tensorrt_llm::runtime::LoraModule::toModuleType (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime10LoraModule5valueEv">tensorrt_llm::runtime::LoraModule::value (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11MambaConfigE">tensorrt_llm::runtime::MambaConfig (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11MambaConfig5dConvE">tensorrt_llm::runtime::MambaConfig::dConv (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11MambaConfig6dStateE">tensorrt_llm::runtime::MambaConfig::dState (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11MambaConfig6expandE">tensorrt_llm::runtime::MambaConfig::expand (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCountersE">tensorrt_llm::runtime::MemoryCounters (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv8SizeType">tensorrt_llm::runtime::MemoryCounters::allocate (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType8SizeType">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei">tensorrt_llm::runtime::MemoryCounters::bytesToString (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8SizeTypei">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv8SizeType">tensorrt_llm::runtime::MemoryCounters::deallocate (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType8SizeType">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters8DiffTypeE">tensorrt_llm::runtime::MemoryCounters::DiffType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getCpuEv">tensorrt_llm::runtime::MemoryCounters::getCpu (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getCpuDiffEv">tensorrt_llm::runtime::MemoryCounters::getCpuDiff (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getGpuEv">tensorrt_llm::runtime::MemoryCounters::getGpu (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getGpuDiffEv">tensorrt_llm::runtime::MemoryCounters::getGpuDiff (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters11getInstanceEv">tensorrt_llm::runtime::MemoryCounters::getInstance (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14MemoryCounters9getPinnedEv">tensorrt_llm::runtime::MemoryCounters::getPinned (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedDiffEv">tensorrt_llm::runtime::MemoryCounters::getPinnedDiff (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getUVMEv">tensorrt_llm::runtime::MemoryCounters::getUVM (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getUVMDiffEv">tensorrt_llm::runtime::MemoryCounters::getUVMDiff (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mCpuE">tensorrt_llm::runtime::MemoryCounters::mCpu (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mCpuDiffE">tensorrt_llm::runtime::MemoryCounters::mCpuDiff (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters14MemoryCountersEv">tensorrt_llm::runtime::MemoryCounters::MemoryCounters (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mGpuE">tensorrt_llm::runtime::MemoryCounters::mGpu (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mGpuDiffE">tensorrt_llm::runtime::MemoryCounters::mGpuDiff (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters7mPinnedE">tensorrt_llm::runtime::MemoryCounters::mPinned (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedDiffE">tensorrt_llm::runtime::MemoryCounters::mPinnedDiff (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mUVME">tensorrt_llm::runtime::MemoryCounters::mUVM (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mUVMDiffE">tensorrt_llm::runtime::MemoryCounters::mUVMDiff (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14MemoryCounters8SizeTypeE">tensorrt_llm::runtime::MemoryCounters::SizeType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14MemoryCounters8toStringEv">tensorrt_llm::runtime::MemoryCounters::toString (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10MemoryTypeE">tensorrt_llm::runtime::MemoryType (C++ enum)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10MemoryType4kCPUE">tensorrt_llm::runtime::MemoryType::kCPU (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10MemoryType4kGPUE">tensorrt_llm::runtime::MemoryType::kGPU (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10MemoryType7kPINNEDE">tensorrt_llm::runtime::MemoryType::kPINNED (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10MemoryType4kUVME">tensorrt_llm::runtime::MemoryType::kUVM (C++ enumerator)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime16MemoryTypeStringE">tensorrt_llm::runtime::MemoryTypeString (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEEE">tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kCPU&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEE5valueE">tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kCPU&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEEE">tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kGPU&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEE5valueE">tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kGPU&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEEE">tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNED&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEE5valueE">tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNED&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEEE">tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kUVM&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEE5valueE">tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kUVM&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK10LoraModule">tensorrt_llm::runtime::operator&lt;&lt; (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK26LoraCachePageManagerConfig">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE">[4]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN9LoraCache21TaskLayerModuleConfigE">[5]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE">tensorrt_llm::runtime::PhonyNameDueToError::name (C++ member)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE">[4]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE">[5]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE">[6]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE">[7]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4nameE">[8]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE">tensorrt_llm::runtime::PhonyNameDueToError::size (C++ member)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE">[4]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE">[5]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE">[6]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE">[7]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4sizeE">[8]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE">tensorrt_llm::runtime::PhonyNameDueToError::type (C++ type)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE">[4]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE">[5]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE">[6]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE">[7]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError4typeE">[8]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE">tensorrt_llm::runtime::PhonyNameDueToError::value (C++ member)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE">[2]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE">[3]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE">[4]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE">[5]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE">[6]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE">[7]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE">[8]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime19PhonyNameDueToError5valueE">[9]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime18PointerElementTypeE">tensorrt_llm::runtime::PointerElementType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18PromptTuningParamsE">tensorrt_llm::runtime::PromptTuningParams (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtrK8SizeTypeK8SizeTypeRKNSt6vectorI8SizeTypeEERKNSt6vectorI8SizeTypeEERK13BufferManagerb">tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr">tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18PromptTuningParams8SizeTypeE">tensorrt_llm::runtime::PromptTuningParams::SizeType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime18PromptTuningParams9TensorPtrE">tensorrt_llm::runtime::PromptTuningParams::TensorPtr (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfigE">tensorrt_llm::runtime::SamplingConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig23beamSearchDiversityRateE">tensorrt_llm::runtime::SamplingConfig::beamSearchDiversityRate (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig9beamWidthE">tensorrt_llm::runtime::SamplingConfig::beamWidth (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig24draftAcceptanceThresholdE">tensorrt_llm::runtime::SamplingConfig::draftAcceptanceThreshold (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig13earlyStoppingE">tensorrt_llm::runtime::SamplingConfig::earlyStopping (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig9FloatTypeE">tensorrt_llm::runtime::SamplingConfig::FloatType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig16frequencyPenaltyE">tensorrt_llm::runtime::SamplingConfig::frequencyPenalty (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE8SizeTypeEEE">tensorrt_llm::runtime::SamplingConfig::fuseValues (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig13lengthPenaltyE">tensorrt_llm::runtime::SamplingConfig::lengthPenalty (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig9minLengthE">tensorrt_llm::runtime::SamplingConfig::minLength (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig17normalizeLogProbsE">tensorrt_llm::runtime::SamplingConfig::normalizeLogProbs (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime14SamplingConfigeqERK14SamplingConfig">tensorrt_llm::runtime::SamplingConfig::operator== (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig6OptVecE">tensorrt_llm::runtime::SamplingConfig::OptVec (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig15presencePenaltyE">tensorrt_llm::runtime::SamplingConfig::presencePenalty (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig10randomSeedE">tensorrt_llm::runtime::SamplingConfig::randomSeed (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig17repetitionPenaltyE">tensorrt_llm::runtime::SamplingConfig::repetitionPenalty (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigE8SizeType">tensorrt_llm::runtime::SamplingConfig::SamplingConfig (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKN8executor14SamplingConfigERKNSt8optionalIN8executor25SpeculativeDecodingConfigEEE">[1]</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKNSt6vectorI14SamplingConfigEE">[2]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig11temperatureE">tensorrt_llm::runtime::SamplingConfig::temperature (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topKE">tensorrt_llm::runtime::SamplingConfig::topK (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig15topKMedusaHeadsE">tensorrt_llm::runtime::SamplingConfig::topKMedusaHeads (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topPE">tensorrt_llm::runtime::SamplingConfig::topP (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig9topPDecayE">tensorrt_llm::runtime::SamplingConfig::topPDecay (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig7topPMinE">tensorrt_llm::runtime::SamplingConfig::topPMin (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime14SamplingConfig12topPResetIdsE">tensorrt_llm::runtime::SamplingConfig::topPResetIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig3VecE">tensorrt_llm::runtime::SamplingConfig::Vec (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime13setPeerAccessERK11WorldConfigb">tensorrt_llm::runtime::setPeerAccess (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime8SizeTypeE">tensorrt_llm::runtime::SizeType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime12StringPtrMapE">tensorrt_llm::runtime::StringPtrMap (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10TllmLoggerE">tensorrt_llm::runtime::TllmLogger (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10TllmLogger8getLevelEv">tensorrt_llm::runtime::TllmLogger::getLevel (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE">tensorrt_llm::runtime::TllmLogger::log (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime10TllmLogger8setLevelE8Severity">tensorrt_llm::runtime::TllmLogger::setLevel (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9to_stringERK26LoraCachePageManagerConfig">tensorrt_llm::runtime::to_string (C++ function)</a>, <a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime9to_stringERKN9LoraCache21TaskLayerModuleConfigE">[1]</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TokenIdTypeE">tensorrt_llm::runtime::TokenIdType (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0_bEN12tensorrt_llm7runtime11TRTDataTypeE">tensorrt_llm::runtime::TRTDataType (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIbEE">tensorrt_llm::runtime::TRTDataType&lt;bool&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIbE5valueE">tensorrt_llm::runtime::TRTDataType&lt;bool&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIfEE">tensorrt_llm::runtime::TRTDataType&lt;float&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIfE5valueE">tensorrt_llm::runtime::TRTDataType&lt;float&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeI4halfEE">tensorrt_llm::runtime::TRTDataType&lt;half&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeI4halfE5valueE">tensorrt_llm::runtime::TRTDataType&lt;half&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEEE">tensorrt_llm::runtime::TRTDataType&lt;std::int32_t&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEE5valueE">tensorrt_llm::runtime::TRTDataType&lt;std::int32_t&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEEE">tensorrt_llm::runtime::TRTDataType&lt;std::int64_t&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEE5valueE">tensorrt_llm::runtime::TRTDataType&lt;std::int64_t&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEEE">tensorrt_llm::runtime::TRTDataType&lt;std::int8_t&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEE5valueE">tensorrt_llm::runtime::TRTDataType&lt;std::int8_t&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEEE">tensorrt_llm::runtime::TRTDataType&lt;std::uint32_t&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEE5valueE">tensorrt_llm::runtime::TRTDataType&lt;std::uint32_t&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEEE">tensorrt_llm::runtime::TRTDataType&lt;std::uint64_t&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEE5valueE">tensorrt_llm::runtime::TRTDataType&lt;std::uint64_t&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEEE">tensorrt_llm::runtime::TRTDataType&lt;std::uint8_t&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEE5valueE">tensorrt_llm::runtime::TRTDataType&lt;std::uint8_t&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4I0EN12tensorrt_llm7runtime11TRTDataTypeIP1TEE">tensorrt_llm::runtime::TRTDataType&lt;T*&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE15kUnderlyingTypeE">tensorrt_llm::runtime::TRTDataType&lt;T*&gt;::kUnderlyingType (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE5valueE">tensorrt_llm::runtime::TRTDataType&lt;T*&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIPvEE">tensorrt_llm::runtime::TRTDataType&lt;void*&gt; (C++ struct)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIPvE5valueE">tensorrt_llm::runtime::TRTDataType&lt;void*&gt;::value (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime5utilsE">tensorrt_llm::runtime::utils (C++ type)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime5utils10loadEngineERKNSt6stringE">tensorrt_llm::runtime::utils::loadEngine (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11WorldConfigE">tensorrt_llm::runtime::WorldConfig (C++ class)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig9getDeviceEv">tensorrt_llm::runtime::WorldConfig::getDevice (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig15getGpusPerGroupEv">tensorrt_llm::runtime::WorldConfig::getGpusPerGroup (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig14getGpusPerNodeEv">tensorrt_llm::runtime::WorldConfig::getGpusPerNode (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getLastRankEv">tensorrt_llm::runtime::WorldConfig::getLastRank (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig24getPipelineParallelGroupEv">tensorrt_llm::runtime::WorldConfig::getPipelineParallelGroup (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getPipelineParallelismEv">tensorrt_llm::runtime::WorldConfig::getPipelineParallelism (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getPipelineParallelRankEv">tensorrt_llm::runtime::WorldConfig::getPipelineParallelRank (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getRankEv">tensorrt_llm::runtime::WorldConfig::getRank (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getSizeEv">tensorrt_llm::runtime::WorldConfig::getSize (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig20getTensorParallelismEv">tensorrt_llm::runtime::WorldConfig::getTensorParallelism (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getTensorParallelRankEv">tensorrt_llm::runtime::WorldConfig::getTensorParallelRank (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig27isFirstPipelineParallelRankEv">tensorrt_llm::runtime::WorldConfig::isFirstPipelineParallelRank (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isLastPipelineParallelRankEv">tensorrt_llm::runtime::WorldConfig::isLastPipelineParallelRank (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig18isPipelineParallelEv">tensorrt_llm::runtime::WorldConfig::isPipelineParallel (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig16isTensorParallelEv">tensorrt_llm::runtime::WorldConfig::isTensorParallel (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11WorldConfig19kDefaultGpusPerNodeE">tensorrt_llm::runtime::WorldConfig::kDefaultGpusPerNode (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11WorldConfig10mDeviceIdsE">tensorrt_llm::runtime::WorldConfig::mDeviceIds (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11WorldConfig12mGpusPerNodeE">tensorrt_llm::runtime::WorldConfig::mGpusPerNode (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE8SizeTypeNSt8optionalI8SizeTypeEENSt8optionalI8SizeTypeEERKNSt8optionalINSt6vectorI8SizeTypeEEEE">tensorrt_llm::runtime::WorldConfig::mpi (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11WorldConfig20mPipelineParallelismE">tensorrt_llm::runtime::WorldConfig::mPipelineParallelism (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11WorldConfig5mRankE">tensorrt_llm::runtime::WorldConfig::mRank (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11WorldConfig18mTensorParallelismE">tensorrt_llm::runtime::WorldConfig::mTensorParallelism (C++ member)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4NK12tensorrt_llm7runtime11WorldConfig14validMpiConfigEv">tensorrt_llm::runtime::WorldConfig::validMpiConfig (C++ function)</a>
</li>
      <li><a href="_cpp_gen/runtime.html#_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE8SizeType8SizeType8SizeType8SizeTypeRKNSt8optionalINSt6vectorI8SizeTypeEEEE">tensorrt_llm::runtime::WorldConfig::WorldConfig (C++ function)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.PretrainedConfig.to_dict">to_dict() (tensorrt_llm.models.PretrainedConfig method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.plugin.html#tensorrt_llm.plugin.PluginConfig.to_legacy_setting">to_legacy_setting() (tensorrt_llm.plugin.PluginConfig method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.to_word_list_format">to_word_list_format() (in module tensorrt_llm.runtime)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.tokens_per_block">tokens_per_block (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.tokens_per_block">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.top_k">top_k (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.top_p">top_p (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.top_p_decay">top_p_decay (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.top_p_min">top_p_min (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.top_p_reset_ids">top_p_reset_ids (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.topk">topk() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.transpose">transpose() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.transpose">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.trtllm_modules_to_hf_modules">trtllm_modules_to_hf_modules (tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceStrategy.TWOSHOT">TWOSHOT (tensorrt_llm.functional.AllReduceStrategy attribute)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="U">U</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.unary">unary() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.unsqueeze">unsqueeze() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.update">update() (tensorrt_llm.runtime.SamplingConfig method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.update_kv_cache_draft_token_location">update_kv_cache_draft_token_location() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.update_output_ids_by_offset">update_output_ids_by_offset() (tensorrt_llm.runtime.GenerationSession method)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.SamplingConfig.use_beam_hyps">use_beam_hyps (tensorrt_llm.runtime.SamplingConfig attribute)</a>
</li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.use_context_fmha_for_generation">use_context_fmha_for_generation (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.use_context_fmha_for_generation">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.use_custom_all_reduce">use_custom_all_reduce (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.use_custom_all_reduce">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.use_gpt_attention_plugin">use_gpt_attention_plugin (tensorrt_llm.runtime.GenerationSession property)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.GPTForCausalLM.use_lora">use_lora() (tensorrt_llm.models.GPTForCausalLM method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.LLaMAForCausalLM.use_lora">(tensorrt_llm.models.LLaMAForCausalLM method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.use_lora_plugin">use_lora_plugin (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.use_lora_plugin">(tensorrt_llm.runtime.ModelRunner property)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.use_mamba_conv1d_plugin">use_mamba_conv1d_plugin (tensorrt_llm.runtime.GenerationSession property)</a>
</li>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.AllReduceConfig.USE_MEMCPY">USE_MEMCPY (tensorrt_llm.functional.AllReduceConfig attribute)</a>
</li>
  </ul></td>
</tr></table>

<h2 id="V">V</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.view">view() (in module tensorrt_llm.functional)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.Tensor.view">(tensorrt_llm.functional.Tensor method)</a>
</li>
      </ul></li>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.GenerationSession.vocab_size">vocab_size (tensorrt_llm.runtime.GenerationSession property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelConfig.vocab_size">(tensorrt_llm.runtime.ModelConfig attribute)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.vocab_size">(tensorrt_llm.runtime.ModelRunner property)</a>
</li>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.vocab_size">(tensorrt_llm.runtime.ModelRunnerCpp property)</a>
</li>
      </ul></li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunner.vocab_size_padded">vocab_size_padded (tensorrt_llm.runtime.ModelRunner property)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.runtime.html#tensorrt_llm.runtime.ModelRunnerCpp.vocab_size_padded">(tensorrt_llm.runtime.ModelRunnerCpp property)</a>
</li>
      </ul></li>
  </ul></td>
</tr></table>

<h2 id="W">W</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.embedding.Embedding.weight_loader">weight_loader() (tensorrt_llm.layers.embedding.Embedding method)</a>

      <ul>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.Linear.weight_loader">(tensorrt_llm.layers.linear.Linear method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.ParallelLMHead.weight_loader">(tensorrt_llm.layers.linear.ParallelLMHead method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.QKVColumnLinear.weight_loader">(tensorrt_llm.layers.linear.QKVColumnLinear method)</a>
</li>
        <li><a href="python-api/tensorrt_llm.layers.html#tensorrt_llm.layers.linear.RowLinear.weight_loader">(tensorrt_llm.layers.linear.RowLinear method)</a>
</li>
      </ul></li>
  </ul></td>
  <td style="width: 33%; vertical-align: top;"><ul>
      <li><a href="python-api/tensorrt_llm.functional.html#tensorrt_llm.functional.where">where() (in module tensorrt_llm.functional)</a>
</li>
      <li><a href="python-api/tensorrt_llm.models.html#tensorrt_llm.models.WhisperEncoder">WhisperEncoder (class in tensorrt_llm.models)</a>
</li>
  </ul></td>
</tr></table>


           </div>
          </div>
          <footer>

  <hr/>

  <div role="contentinfo">
    <p>&#169; Copyright 2023, NVidia.</p>
  </div>

  Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
    provided by <a href="https://readthedocs.org">Read the Docs</a>.


</footer>
        </div>
      </div>
    </section>
  </div>
  <script>
      jQuery(function () {
          SphinxRtdTheme.Navigation.enable(true);
      });
  </script>

</body>
</html>